Message ID | 20220927155332.10762-3-andriy.shevchenko@linux.intel.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 7a84e7353e23202d4f82b05093af4db2b26e6768 |
Headers | show |
Series | usb: dwc3: revert OTG changes for Intel Merrifield | expand |
On Tue, Sep 27, 2022 at 8:53 AM Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > > This reverts commit 0f01017191384e3962fa31520a9fd9846c3d352f. > > As pointed out by Ferry this breaks Dual Role support on > Intel Merrifield platforms. > > Fixes: 0f0101719138 ("usb: dwc3: Don't switch OTG -> peripheral if extcon is present") > Reported-by: Ferry Toth <fntoth@gmail.com> > Cc: stable@vger.kernel.org > Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> > Tested-by: Ferry Toth <fntoth@gmail.com> # for Merrifield Sven can you check that this also fixes the regression of your fix? > --- > drivers/usb/dwc3/core.c | 55 +---------------------------------------- > drivers/usb/dwc3/drd.c | 50 +++++++++++++++++++++++++++++++++++++ > 2 files changed, 51 insertions(+), 54 deletions(-) > > diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c > index c2b463469d51..219d797e2230 100644 > --- a/drivers/usb/dwc3/core.c > +++ b/drivers/usb/dwc3/core.c > @@ -23,7 +23,6 @@ > #include <linux/delay.h> > #include <linux/dma-mapping.h> > #include <linux/of.h> > -#include <linux/of_graph.h> > #include <linux/acpi.h> > #include <linux/pinctrl/consumer.h> > #include <linux/reset.h> > @@ -86,7 +85,7 @@ static int dwc3_get_dr_mode(struct dwc3 *dwc) > * mode. If the controller supports DRD but the dr_mode is not > * specified or set to OTG, then set the mode to peripheral. > */ > - if (mode == USB_DR_MODE_OTG && !dwc->edev && > + if (mode == USB_DR_MODE_OTG && > (!IS_ENABLED(CONFIG_USB_ROLE_SWITCH) || > !device_property_read_bool(dwc->dev, "usb-role-switch")) && > !DWC3_VER_IS_PRIOR(DWC3, 330A)) > @@ -1668,51 +1667,6 @@ static void dwc3_check_params(struct dwc3 *dwc) > } > } > > -static struct extcon_dev *dwc3_get_extcon(struct dwc3 *dwc) > -{ > - struct device *dev = dwc->dev; > - struct device_node *np_phy; > - struct extcon_dev *edev = NULL; > - const char *name; > - > - if (device_property_read_bool(dev, "extcon")) > - return extcon_get_edev_by_phandle(dev, 0); > - > - /* > - * Device tree platforms should get extcon via phandle. > - * On ACPI platforms, we get the name from a device property. > - * This device property is for kernel internal use only and > - * is expected to be set by the glue code. > - */ > - if (device_property_read_string(dev, "linux,extcon-name", &name) == 0) { > - edev = extcon_get_extcon_dev(name); > - if (!edev) > - return ERR_PTR(-EPROBE_DEFER); > - > - return edev; > - } > - > - /* > - * Try to get an extcon device from the USB PHY controller's "port" > - * node. Check if it has the "port" node first, to avoid printing the > - * error message from underlying code, as it's a valid case: extcon > - * device (and "port" node) may be missing in case of "usb-role-switch" > - * or OTG mode. > - */ > - np_phy = of_parse_phandle(dev->of_node, "phys", 0); > - if (of_graph_is_present(np_phy)) { > - struct device_node *np_conn; > - > - np_conn = of_graph_get_remote_node(np_phy, -1, -1); > - if (np_conn) > - edev = extcon_find_edev_by_node(np_conn); > - of_node_put(np_conn); > - } > - of_node_put(np_phy); > - > - return edev; > -} > - > static int dwc3_probe(struct platform_device *pdev) > { > struct device *dev = &pdev->dev; > @@ -1849,13 +1803,6 @@ static int dwc3_probe(struct platform_device *pdev) > goto err2; > } > > - dwc->edev = dwc3_get_extcon(dwc); > - if (IS_ERR(dwc->edev)) { > - ret = PTR_ERR(dwc->edev); > - dev_err_probe(dwc->dev, ret, "failed to get extcon\n"); > - goto err3; > - } > - > ret = dwc3_get_dr_mode(dwc); > if (ret) > goto err3; > diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c > index 039bf241769a..8cad9e7d3368 100644 > --- a/drivers/usb/dwc3/drd.c > +++ b/drivers/usb/dwc3/drd.c > @@ -8,6 +8,7 @@ > */ > > #include <linux/extcon.h> > +#include <linux/of_graph.h> > #include <linux/of_platform.h> > #include <linux/platform_device.h> > #include <linux/property.h> > @@ -438,6 +439,51 @@ static int dwc3_drd_notifier(struct notifier_block *nb, > return NOTIFY_DONE; > } > > +static struct extcon_dev *dwc3_get_extcon(struct dwc3 *dwc) > +{ > + struct device *dev = dwc->dev; > + struct device_node *np_phy; > + struct extcon_dev *edev = NULL; > + const char *name; > + > + if (device_property_read_bool(dev, "extcon")) > + return extcon_get_edev_by_phandle(dev, 0); > + > + /* > + * Device tree platforms should get extcon via phandle. > + * On ACPI platforms, we get the name from a device property. > + * This device property is for kernel internal use only and > + * is expected to be set by the glue code. > + */ > + if (device_property_read_string(dev, "linux,extcon-name", &name) == 0) { > + edev = extcon_get_extcon_dev(name); > + if (!edev) > + return ERR_PTR(-EPROBE_DEFER); > + > + return edev; > + } > + > + /* > + * Try to get an extcon device from the USB PHY controller's "port" > + * node. Check if it has the "port" node first, to avoid printing the > + * error message from underlying code, as it's a valid case: extcon > + * device (and "port" node) may be missing in case of "usb-role-switch" > + * or OTG mode. > + */ > + np_phy = of_parse_phandle(dev->of_node, "phys", 0); > + if (of_graph_is_present(np_phy)) { > + struct device_node *np_conn; > + > + np_conn = of_graph_get_remote_node(np_phy, -1, -1); > + if (np_conn) > + edev = extcon_find_edev_by_node(np_conn); > + of_node_put(np_conn); > + } > + of_node_put(np_phy); > + > + return edev; > +} > + > #if IS_ENABLED(CONFIG_USB_ROLE_SWITCH) > #define ROLE_SWITCH 1 > static int dwc3_usb_role_switch_set(struct usb_role_switch *sw, > @@ -542,6 +588,10 @@ int dwc3_drd_init(struct dwc3 *dwc) > device_property_read_bool(dwc->dev, "usb-role-switch")) > return dwc3_setup_role_switch(dwc); > > + dwc->edev = dwc3_get_extcon(dwc); > + if (IS_ERR(dwc->edev)) > + return PTR_ERR(dwc->edev); > + > if (dwc->edev) { > dwc->edev_nb.notifier_call = dwc3_drd_notifier; > ret = extcon_register_notifier(dwc->edev, EXTCON_USB_HOST, > -- > 2.35.1 >
On Thu, Sep 29, 2022, at 05:01, Andrey Smirnov wrote: > On Tue, Sep 27, 2022 at 8:53 AM Andy Shevchenko > <andriy.shevchenko@linux.intel.com> wrote: >> >> This reverts commit 0f01017191384e3962fa31520a9fd9846c3d352f. >> >> As pointed out by Ferry this breaks Dual Role support on >> Intel Merrifield platforms. >> >> Fixes: 0f0101719138 ("usb: dwc3: Don't switch OTG -> peripheral if extcon is present") >> Reported-by: Ferry Toth <fntoth@gmail.com> >> Cc: stable@vger.kernel.org >> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> >> Tested-by: Ferry Toth <fntoth@gmail.com> # for Merrifield > > Sven can you check that this also fixes the regression of your fix? I'll only have time to test this on the weekend but the fix looks good to me. Reviewed-by: Sven Peter <sven@svenpeter.dev> Sven
Hi Andy, On Tue, Sep 27, 2022, Andy Shevchenko wrote: > This reverts commit 0f01017191384e3962fa31520a9fd9846c3d352f. > > As pointed out by Ferry this breaks Dual Role support on > Intel Merrifield platforms. Can you provide more info than this (any debug info/description)? It will be difficult to come back to fix with just this to go on. The reverted patch was needed to fix a different issue. Thanks, Thinh > > Fixes: 0f0101719138 ("usb: dwc3: Don't switch OTG -> peripheral if extcon is present") > Reported-by: Ferry Toth <fntoth@gmail.com> > Cc: stable@vger.kernel.org > Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> > Tested-by: Ferry Toth <fntoth@gmail.com> # for Merrifield > --- > drivers/usb/dwc3/core.c | 55 +---------------------------------------- > drivers/usb/dwc3/drd.c | 50 +++++++++++++++++++++++++++++++++++++ > 2 files changed, 51 insertions(+), 54 deletions(-)
On Mon, Oct 03, 2022 at 09:57:35PM +0000, Thinh Nguyen wrote: > On Tue, Sep 27, 2022, Andy Shevchenko wrote: > > This reverts commit 0f01017191384e3962fa31520a9fd9846c3d352f. > > > > As pointed out by Ferry this breaks Dual Role support on > > Intel Merrifield platforms. > > Can you provide more info than this (any debug info/description)? It > will be difficult to come back to fix with just this to go on. The > reverted patch was needed to fix a different issue. It's already applied, but Ferry probably can provide more info if you describe step-by-step instructions. (I'm still unable to test this particular type of features since remove access is always in host mode.)
Hi Op 04-10-2022 om 10:28 schreef Andy Shevchenko: > On Mon, Oct 03, 2022 at 09:57:35PM +0000, Thinh Nguyen wrote: >> On Tue, Sep 27, 2022, Andy Shevchenko wrote: >>> This reverts commit 0f01017191384e3962fa31520a9fd9846c3d352f. >>> >>> As pointed out by Ferry this breaks Dual Role support on >>> Intel Merrifield platforms. >> Can you provide more info than this (any debug info/description)? It >> will be difficult to come back to fix with just this to go on. The >> reverted patch was needed to fix a different issue. On Merrifield we have a switch with extcon driver to switch between host and device mode. Now with commit 0f01017, device mode works. In host mode the root hub appears, but no devices appear. In the logs there are no messages from tusb1210, but there should be because lately there normally are (harmless) error messages. Nothing in the logs point in the direction of tusb1210 not being probed. The discussion is here: https://lkml.org/lkml/2022/9/24/237 I tried moving some code as suggested without result: https://lkml.org/lkml/2022/9/24/434 And with success: https://lkml.org/lkml/2022/9/25/285 So, as Andrey Smirnov writes "I think we'd want to figure out why the ordering is important if we want to justify the above fix." > It's already applied, but Ferry probably can provide more info if you describe > step-by-step instructions. (I'm still unable to test this particular type of > features since remove access is always in host mode.) > I'd be happy to test.
On Tue, Oct 04, 2022, Ferry Toth wrote: > Hi > > Op 04-10-2022 om 10:28 schreef Andy Shevchenko: > > On Mon, Oct 03, 2022 at 09:57:35PM +0000, Thinh Nguyen wrote: > > > On Tue, Sep 27, 2022, Andy Shevchenko wrote: > > > > This reverts commit 0f01017191384e3962fa31520a9fd9846c3d352f. > > > > > > > > As pointed out by Ferry this breaks Dual Role support on > > > > Intel Merrifield platforms. > > > Can you provide more info than this (any debug info/description)? It > > > will be difficult to come back to fix with just this to go on. The > > > reverted patch was needed to fix a different issue. > > On Merrifield we have a switch with extcon driver to switch between host and > device mode. Now with commit 0f01017, device mode works. In host mode the > root hub appears, but no devices appear. In the logs there are no messages > from tusb1210, but there should be because lately there normally are > (harmless) error messages. Nothing in the logs point in the direction of > tusb1210 not being probed. > > The discussion is here: https://urldefense.com/v3/__https://lkml.org/lkml/2022/9/24/237__;!!A4F2R9G_pg!avfDjiGwi8xu0grHYrQQTZEEUnmaKu82xxdty0ZltxyU8BkoFD6AMq4a-7STYiKxNQpdYXgP1QG_IZbroEM$ > > I tried moving some code as suggested without result: https://urldefense.com/v3/__https://lkml.org/lkml/2022/9/24/434__;!!A4F2R9G_pg!avfDjiGwi8xu0grHYrQQTZEEUnmaKu82xxdty0ZltxyU8BkoFD6AMq4a-7STYiKxNQpdYXgP1QG_boaK8Qw$ > > And with success: https://urldefense.com/v3/__https://lkml.org/lkml/2022/9/25/285__;!!A4F2R9G_pg!avfDjiGwi8xu0grHYrQQTZEEUnmaKu82xxdty0ZltxyU8BkoFD6AMq4a-7STYiKxNQpdYXgP1QG_MbbbZII$ > > So, as Andrey Smirnov writes "I think we'd want to figure out why the > ordering is important if we want to justify the above fix." > > > It's already applied, but Ferry probably can provide more info if you describe > > step-by-step instructions. (I'm still unable to test this particular type of > > features since remove access is always in host mode.) > > > I'd be happy to test. Thanks! Does the failure only happen the first time host is initialized? Or can it recover after switching to device then back to host mode? Probably the failure happens if some step(s) in dwc3_core_init() hasn't completed. tusb1210 is a phy driver right? The issue is probably because we didn't initialize the phy yet. So, I suspect placing dwc3_get_extcon() after initializing the phy will probably solve the dependency problem. You can try something for yourself or I can provide something to test later if you don't mind (maybe next week if it's ok). Thanks, Thinh
On Tue, Oct 4, 2022 at 7:12 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote: > > On Tue, Oct 04, 2022, Ferry Toth wrote: > > Hi > > > > Op 04-10-2022 om 10:28 schreef Andy Shevchenko: > > > On Mon, Oct 03, 2022 at 09:57:35PM +0000, Thinh Nguyen wrote: > > > > On Tue, Sep 27, 2022, Andy Shevchenko wrote: > > > > > This reverts commit 0f01017191384e3962fa31520a9fd9846c3d352f. > > > > > > > > > > As pointed out by Ferry this breaks Dual Role support on > > > > > Intel Merrifield platforms. > > > > Can you provide more info than this (any debug info/description)? It > > > > will be difficult to come back to fix with just this to go on. The > > > > reverted patch was needed to fix a different issue. > > > > On Merrifield we have a switch with extcon driver to switch between host and > > device mode. Now with commit 0f01017, device mode works. In host mode the > > root hub appears, but no devices appear. In the logs there are no messages > > from tusb1210, but there should be because lately there normally are > > (harmless) error messages. Nothing in the logs point in the direction of > > tusb1210 not being probed. > > > > The discussion is here: https://urldefense.com/v3/__https://lkml.org/lkml/2022/9/24/237__;!!A4F2R9G_pg!avfDjiGwi8xu0grHYrQQTZEEUnmaKu82xxdty0ZltxyU8BkoFD6AMq4a-7STYiKxNQpdYXgP1QG_IZbroEM$ > > > > I tried moving some code as suggested without result: https://urldefense.com/v3/__https://lkml.org/lkml/2022/9/24/434__;!!A4F2R9G_pg!avfDjiGwi8xu0grHYrQQTZEEUnmaKu82xxdty0ZltxyU8BkoFD6AMq4a-7STYiKxNQpdYXgP1QG_boaK8Qw$ > > > > And with success: https://urldefense.com/v3/__https://lkml.org/lkml/2022/9/25/285__;!!A4F2R9G_pg!avfDjiGwi8xu0grHYrQQTZEEUnmaKu82xxdty0ZltxyU8BkoFD6AMq4a-7STYiKxNQpdYXgP1QG_MbbbZII$ > > > > So, as Andrey Smirnov writes "I think we'd want to figure out why the > > ordering is important if we want to justify the above fix." > > > > > It's already applied, but Ferry probably can provide more info if you describe > > > step-by-step instructions. (I'm still unable to test this particular type of > > > features since remove access is always in host mode.) > > > > > I'd be happy to test. > > Thanks! > > Does the failure only happen the first time host is initialized? Or can > it recover after switching to device then back to host mode? > > Probably the failure happens if some step(s) in dwc3_core_init() hasn't > completed. > > tusb1210 is a phy driver right? The issue is probably because we didn't > initialize the phy yet. So, I suspect placing dwc3_get_extcon() after > initializing the phy will probably solve the dependency problem. > > You can try something for yourself or I can provide something to test > later if you don't mind (maybe next week if it's ok). FWIW, I just got the same HW Ferry has last week and am planning to work on this over the weekend.
On Wed, Oct 05, 2022 at 10:10:42AM +0200, Ferry Toth wrote: > On 05-10-2022 04:39, Andrey Smirnov wrote: > > On Tue, Oct 4, 2022 at 7:12 PM Thinh Nguyen<Thinh.Nguyen@synopsys.com> wrote: ... > > FWIW, I just got the same HW Ferry has last week and am planning to > > work on this over the weekend. > I can help you setup, we have binary images available on github as well as > Yocto recipies to build them. Also you can build all components (U-Boot, kernel, Buildroot initrd) separately as explained here: https://edison-fw.github.io/edison-wiki/u-boot-update https://edison-fw.github.io/edison-wiki/vanilla https://github.com/andy-shev/buildroot/tree/intel/board/intel/common
Hi On 05-10-2022 04:39, Andrey Smirnov wrote: > On Tue, Oct 4, 2022 at 7:12 PM Thinh Nguyen<Thinh.Nguyen@synopsys.com> wrote: >> On Tue, Oct 04, 2022, Ferry Toth wrote: >>> Hi >>> >>> Op 04-10-2022 om 10:28 schreef Andy Shevchenko: >>>> On Mon, Oct 03, 2022 at 09:57:35PM +0000, Thinh Nguyen wrote: >>>>> On Tue, Sep 27, 2022, Andy Shevchenko wrote: >>>>>> This reverts commit 0f01017191384e3962fa31520a9fd9846c3d352f. >>>>>> >>>>>> As pointed out by Ferry this breaks Dual Role support on >>>>>> Intel Merrifield platforms. >>>>> Can you provide more info than this (any debug info/description)? It >>>>> will be difficult to come back to fix with just this to go on. The >>>>> reverted patch was needed to fix a different issue. >>> On Merrifield we have a switch with extcon driver to switch between host and >>> device mode. Now with commit 0f01017, device mode works. In host mode the >>> root hub appears, but no devices appear. In the logs there are no messages >>> from tusb1210, but there should be because lately there normally are >>> (harmless) error messages. Nothing in the logs point in the direction of >>> tusb1210 not being probed. >>> >>> The discussion is here:https://urldefense.com/v3/__https://lkml.org/lkml/2022/9/24/237__;!!A4F2R9G_pg!avfDjiGwi8xu0grHYrQQTZEEUnmaKu82xxdty0ZltxyU8BkoFD6AMq4a-7STYiKxNQpdYXgP1QG_IZbroEM$ >>> >>> I tried moving some code as suggested without result:https://urldefense.com/v3/__https://lkml.org/lkml/2022/9/24/434__;!!A4F2R9G_pg!avfDjiGwi8xu0grHYrQQTZEEUnmaKu82xxdty0ZltxyU8BkoFD6AMq4a-7STYiKxNQpdYXgP1QG_boaK8Qw$ >>> >>> And with success:https://urldefense.com/v3/__https://lkml.org/lkml/2022/9/25/285__;!!A4F2R9G_pg!avfDjiGwi8xu0grHYrQQTZEEUnmaKu82xxdty0ZltxyU8BkoFD6AMq4a-7STYiKxNQpdYXgP1QG_MbbbZII$ >>> >>> So, as Andrey Smirnov writes "I think we'd want to figure out why the >>> ordering is important if we want to justify the above fix." >>> >>>> It's already applied, but Ferry probably can provide more info if you describe >>>> step-by-step instructions. (I'm still unable to test this particular type of >>>> features since remove access is always in host mode.) >>>> >>> I'd be happy to test. >> Thanks! >> >> Does the failure only happen the first time host is initialized? Or can >> it recover after switching to device then back to host mode? >> >> Probably the failure happens if some step(s) in dwc3_core_init() hasn't >> completed. >> >> tusb1210 is a phy driver right? The issue is probably because we didn't >> initialize the phy yet. So, I suspect placing dwc3_get_extcon() after >> initializing the phy will probably solve the dependency problem. >> >> You can try something for yourself or I can provide something to test >> later if you don't mind (maybe next week if it's ok). > FWIW, I just got the same HW Ferry has last week and am planning to > work on this over the weekend. I can help you setup, we have binary images available on github as well as Yocto recipies to build them.
On Tue, Oct 04, 2022, Andrey Smirnov wrote: > On Tue, Oct 4, 2022 at 7:12 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote: > > > > On Tue, Oct 04, 2022, Ferry Toth wrote: > > > Hi > > > > > > Op 04-10-2022 om 10:28 schreef Andy Shevchenko: > > > > On Mon, Oct 03, 2022 at 09:57:35PM +0000, Thinh Nguyen wrote: > > > > > On Tue, Sep 27, 2022, Andy Shevchenko wrote: > > > > > > This reverts commit 0f01017191384e3962fa31520a9fd9846c3d352f. > > > > > > > > > > > > As pointed out by Ferry this breaks Dual Role support on > > > > > > Intel Merrifield platforms. > > > > > Can you provide more info than this (any debug info/description)? It > > > > > will be difficult to come back to fix with just this to go on. The > > > > > reverted patch was needed to fix a different issue. > > > > > > On Merrifield we have a switch with extcon driver to switch between host and > > > device mode. Now with commit 0f01017, device mode works. In host mode the > > > root hub appears, but no devices appear. In the logs there are no messages > > > from tusb1210, but there should be because lately there normally are > > > (harmless) error messages. Nothing in the logs point in the direction of > > > tusb1210 not being probed. > > > > > > The discussion is here: https://urldefense.com/v3/__https://lkml.org/lkml/2022/9/24/237__;!!A4F2R9G_pg!avfDjiGwi8xu0grHYrQQTZEEUnmaKu82xxdty0ZltxyU8BkoFD6AMq4a-7STYiKxNQpdYXgP1QG_IZbroEM$ > > > > > > I tried moving some code as suggested without result: https://urldefense.com/v3/__https://lkml.org/lkml/2022/9/24/434__;!!A4F2R9G_pg!avfDjiGwi8xu0grHYrQQTZEEUnmaKu82xxdty0ZltxyU8BkoFD6AMq4a-7STYiKxNQpdYXgP1QG_boaK8Qw$ > > > > > > And with success: https://urldefense.com/v3/__https://lkml.org/lkml/2022/9/25/285__;!!A4F2R9G_pg!avfDjiGwi8xu0grHYrQQTZEEUnmaKu82xxdty0ZltxyU8BkoFD6AMq4a-7STYiKxNQpdYXgP1QG_MbbbZII$ > > > > > > So, as Andrey Smirnov writes "I think we'd want to figure out why the > > > ordering is important if we want to justify the above fix." > > > > > > > It's already applied, but Ferry probably can provide more info if you describe > > > > step-by-step instructions. (I'm still unable to test this particular type of > > > > features since remove access is always in host mode.) > > > > > > > I'd be happy to test. > > > > Thanks! > > > > Does the failure only happen the first time host is initialized? Or can > > it recover after switching to device then back to host mode? > > > > Probably the failure happens if some step(s) in dwc3_core_init() hasn't > > completed. > > > > tusb1210 is a phy driver right? The issue is probably because we didn't > > initialize the phy yet. So, I suspect placing dwc3_get_extcon() after > > initializing the phy will probably solve the dependency problem. > > > > You can try something for yourself or I can provide something to test > > later if you don't mind (maybe next week if it's ok). > > FWIW, I just got the same HW Ferry has last week and am planning to > work on this over the weekend. That'd be great. Thanks! Thinh
On Wed, Oct 05, 2022, Ferry Toth wrote: > Hi, > > Thanks! > > Does the failure only happen the first time host is initialized? Or can > it recover after switching to device then back to host mode? > > I can switch back and forth and device mode works each time, host mode remains > dead. Ok. > > Probably the failure happens if some step(s) in dwc3_core_init() hasn't > completed. > > tusb1210 is a phy driver right? The issue is probably because we didn't > initialize the phy yet. So, I suspect placing dwc3_get_extcon() after > initializing the phy will probably solve the dependency problem. > > You can try something for yourself or I can provide something to test > later if you don't mind (maybe next week if it's ok). > > Yes, the code move I mentioned above "moves dwc3_get_extcon() until after > dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU initially > dwc3_get_extcon() was called from within dwc3_core_init_mode() but only for > case USB_DR_MODE_OTG. So with this change order of events is more or less > unchanged" solves the issue. > I saw the experiment you did from the link you provided. We want to also confirm exactly which step in dwc3_core_init() was needed. Thanks, Thinh
Hi On 06-10-2022 04:12, Thinh Nguyen wrote: > On Wed, Oct 05, 2022, Ferry Toth wrote: >> Hi, >> >> Thanks! >> >> Does the failure only happen the first time host is initialized? Or can >> it recover after switching to device then back to host mode? >> >> I can switch back and forth and device mode works each time, host mode remains >> dead. > Ok. > >> Probably the failure happens if some step(s) in dwc3_core_init() hasn't >> completed. >> >> tusb1210 is a phy driver right? The issue is probably because we didn't >> initialize the phy yet. So, I suspect placing dwc3_get_extcon() after >> initializing the phy will probably solve the dependency problem. >> >> You can try something for yourself or I can provide something to test >> later if you don't mind (maybe next week if it's ok). >> >> Yes, the code move I mentioned above "moves dwc3_get_extcon() until after >> dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU initially >> dwc3_get_extcon() was called from within dwc3_core_init_mode() but only for >> case USB_DR_MODE_OTG. So with this change order of events is more or less >> unchanged" solves the issue. >> > I saw the experiment you did from the link you provided. We want to also > confirm exactly which step in dwc3_core_init() was needed. Ok. I first tried the code move suggested by Andrey (didn't work). Then after reading the actual code I moved a bit further. This move was on top of -rc6 without any reverts. I did not make additional changes to dwc3_core_init() So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - dwc3_core_init - .. - dwc3_core_init_mode (not working) I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - dwc3_core_init - .. - dwc3_core_init_mode (no change) Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - dwc3_get_extcon - dwc3_core_init_mode (works) .. are what I believe for this issue irrelevant calls to dwc3_alloc_scratch_buffers, dwc3_check_params and dwc3_debugfs_init. > > Thanks, > Thinh
On Thu, Oct 06, 2022, Ferry Toth wrote: > Hi > > On 06-10-2022 04:12, Thinh Nguyen wrote: > > On Wed, Oct 05, 2022, Ferry Toth wrote: > > > Hi, > > > > > > Thanks! > > > > > > Does the failure only happen the first time host is initialized? Or can > > > it recover after switching to device then back to host mode? > > > > > > I can switch back and forth and device mode works each time, host mode remains > > > dead. > > Ok. > > > > > Probably the failure happens if some step(s) in dwc3_core_init() hasn't > > > completed. > > > > > > tusb1210 is a phy driver right? The issue is probably because we didn't > > > initialize the phy yet. So, I suspect placing dwc3_get_extcon() after > > > initializing the phy will probably solve the dependency problem. > > > > > > You can try something for yourself or I can provide something to test > > > later if you don't mind (maybe next week if it's ok). > > > > > > Yes, the code move I mentioned above "moves dwc3_get_extcon() until after > > > dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU initially > > > dwc3_get_extcon() was called from within dwc3_core_init_mode() but only for > > > case USB_DR_MODE_OTG. So with this change order of events is more or less > > > unchanged" solves the issue. > > > > > I saw the experiment you did from the link you provided. We want to also > > confirm exactly which step in dwc3_core_init() was needed. > > Ok. I first tried the code move suggested by Andrey (didn't work). Then > after reading the actual code I moved a bit further. > > This move was on top of -rc6 without any reverts. I did not make additional > changes to dwc3_core_init() > > So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - > dwc3_core_init - .. - dwc3_core_init_mode (not working) > > I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - dwc3_core_init - .. > - dwc3_core_init_mode (no change) > > Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - dwc3_get_extcon - > dwc3_core_init_mode (works) > > .. are what I believe for this issue irrelevant calls to > dwc3_alloc_scratch_buffers, dwc3_check_params and dwc3_debugfs_init. > Right. Thanks for narrowing it down. There are still many steps in dwc3_core_init(). We have some suspicion, but we still haven't confirmed the exact cause of the failure. We can write a proper patch once we know the reason. Thanks, Thinh
On 07-10-2022 04:11, Thinh Nguyen wrote: > On Thu, Oct 06, 2022, Ferry Toth wrote: >> Hi >> >> On 06-10-2022 04:12, Thinh Nguyen wrote: >>> On Wed, Oct 05, 2022, Ferry Toth wrote: >>>> Hi, >>>> >>>> Thanks! >>>> >>>> Does the failure only happen the first time host is initialized? Or can >>>> it recover after switching to device then back to host mode? >>>> >>>> I can switch back and forth and device mode works each time, host mode remains >>>> dead. >>> Ok. >>> >>>> Probably the failure happens if some step(s) in dwc3_core_init() hasn't >>>> completed. >>>> >>>> tusb1210 is a phy driver right? The issue is probably because we didn't >>>> initialize the phy yet. So, I suspect placing dwc3_get_extcon() after >>>> initializing the phy will probably solve the dependency problem. >>>> >>>> You can try something for yourself or I can provide something to test >>>> later if you don't mind (maybe next week if it's ok). >>>> >>>> Yes, the code move I mentioned above "moves dwc3_get_extcon() until after >>>> dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU initially >>>> dwc3_get_extcon() was called from within dwc3_core_init_mode() but only for >>>> case USB_DR_MODE_OTG. So with this change order of events is more or less >>>> unchanged" solves the issue. >>>> >>> I saw the experiment you did from the link you provided. We want to also >>> confirm exactly which step in dwc3_core_init() was needed. >> Ok. I first tried the code move suggested by Andrey (didn't work). Then >> after reading the actual code I moved a bit further. >> >> This move was on top of -rc6 without any reverts. I did not make additional >> changes to dwc3_core_init() >> >> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - >> dwc3_core_init - .. - dwc3_core_init_mode (not working) >> >> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - dwc3_core_init - .. >> - dwc3_core_init_mode (no change) >> >> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - dwc3_get_extcon - >> dwc3_core_init_mode (works) >> >> .. are what I believe for this issue irrelevant calls to >> dwc3_alloc_scratch_buffers, dwc3_check_params and dwc3_debugfs_init. >> > Right. Thanks for narrowing it down. There are still many steps in > dwc3_core_init(). We have some suspicion, but we still haven't confirmed > the exact cause of the failure. We can write a proper patch once we know > the reason. If you would like me to test your suspicion, just tell me what to do :-) > > Thanks, > Thinh
On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: > > > On 07-10-2022 04:11, Thinh Nguyen wrote: > > On Thu, Oct 06, 2022, Ferry Toth wrote: > >> Hi > >> > >> On 06-10-2022 04:12, Thinh Nguyen wrote: > >>> On Wed, Oct 05, 2022, Ferry Toth wrote: > >>>> Hi, > >>>> > >>>> Thanks! > >>>> > >>>> Does the failure only happen the first time host is initialized? Or can > >>>> it recover after switching to device then back to host mode? > >>>> > >>>> I can switch back and forth and device mode works each time, host mode remains > >>>> dead. > >>> Ok. > >>> > >>>> Probably the failure happens if some step(s) in dwc3_core_init() hasn't > >>>> completed. > >>>> > >>>> tusb1210 is a phy driver right? The issue is probably because we didn't > >>>> initialize the phy yet. So, I suspect placing dwc3_get_extcon() after > >>>> initializing the phy will probably solve the dependency problem. > >>>> > >>>> You can try something for yourself or I can provide something to test > >>>> later if you don't mind (maybe next week if it's ok). > >>>> > >>>> Yes, the code move I mentioned above "moves dwc3_get_extcon() until after > >>>> dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU initially > >>>> dwc3_get_extcon() was called from within dwc3_core_init_mode() but only for > >>>> case USB_DR_MODE_OTG. So with this change order of events is more or less > >>>> unchanged" solves the issue. > >>>> > >>> I saw the experiment you did from the link you provided. We want to also > >>> confirm exactly which step in dwc3_core_init() was needed. > >> Ok. I first tried the code move suggested by Andrey (didn't work). Then > >> after reading the actual code I moved a bit further. > >> > >> This move was on top of -rc6 without any reverts. I did not make additional > >> changes to dwc3_core_init() > >> > >> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - > >> dwc3_core_init - .. - dwc3_core_init_mode (not working) > >> > >> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - dwc3_core_init - .. > >> - dwc3_core_init_mode (no change) > >> > >> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - dwc3_get_extcon - > >> dwc3_core_init_mode (works) > >> > >> .. are what I believe for this issue irrelevant calls to > >> dwc3_alloc_scratch_buffers, dwc3_check_params and dwc3_debugfs_init. > >> > > Right. Thanks for narrowing it down. There are still many steps in > > dwc3_core_init(). We have some suspicion, but we still haven't confirmed > > the exact cause of the failure. We can write a proper patch once we know > > the reason. > If you would like me to test your suspicion, just tell me what to do :-) OK, Ferry, I think I'm going to need clarification on specifics on your test setup. Can you share your kernel config, maybe your "/proc/config.gz", somewhere? When you say you are running vanilla Linux, do you mean it or do you mean vanilla tree + some patch delta? The reason I'm asking is because I'm having a hard time reproducing the problem on my end. In fact, when I build v6.0 (4fe89d07dcc2804c8b562f6c7896a45643d34b2f) and then do a git revert 8bd6b8c4b100 0f0101719138 (original revert proposed by Andy) I get an infinite loop of reprobing that looks something like (some debug tracing, function name + line number, included): [ 6.160732] tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 to reg 0x80 [ 6.172299] XXXXXXXXXXX: dwc3_probe 1834 [ 6.172426] XXXXXXXXXXX: dwc3_core_init_mode 1386 [ 6.176391] XXXXXXXXXXX: dwc3_drd_init 593 [ 6.181573] dwc3 dwc3.0.auto: Driver dwc3 requests probe deferral [ 6.191886] platform dwc3.0.auto: Added to deferred list [ 6.197249] platform dwc3.0.auto: Retrying from deferred list [ 6.203057] bus: 'platform': __driver_probe_device: matched device dwc3.0.auto with driver dwc3 [ 6.211783] bus: 'platform': really_probe: probing driver dwc3 with device dwc3.0.auto [ 6.219935] XXXXXXXXXXX: dwc3_probe 1822 [ 6.219952] XXXXXXXXXXX: dwc3_core_init 1092 [ 6.223903] XXXXXXXXXXX: dwc3_core_init 1095 [ 6.234839] bus: 'ulpi': __driver_probe_device: matched device dwc3.0.auto.ulpi with driver tusb1210 [ 6.248335] bus: 'ulpi': really_probe: probing driver tusb1210 with device dwc3.0.auto.ulpi [ 6.257039] driver: 'tusb1210': driver_bound: bound to device 'dwc3.0.auto.ulpi' [ 6.264501] bus: 'ulpi': really_probe: bound device dwc3.0.auto.ulpi to driver tusb1210 [ 6.272553] debugfs: Directory 'dwc3.0.auto' with parent 'ulpi' already present! [ 6.279978] XXXXXXXXXXX: dwc3_core_init 1099 [ 6.279991] XXXXXXXXXXX: dwc3_core_init 1103 [ 6.345769] tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 to reg 0x80 [ 6.357316] XXXXXXXXXXX: dwc3_probe 1834 [ 6.357447] XXXXXXXXXXX: dwc3_core_init_mode 1386 [ 6.361402] XXXXXXXXXXX: dwc3_drd_init 593 [ 6.366589] dwc3 dwc3.0.auto: Driver dwc3 requests probe deferral [ 6.376901] platform dwc3.0.auto: Added to deferred list which renders the system completely unusable, but USB host is definitely going to be broken too. Now, ironically, with my patch in-place, an attempt to probe extcon that ends up deferring the probe happens before the ULPI driver failure (which wasn't failing driver probe prior to https://lore.kernel.org/all/20220213130524.18748-7-hdegoede@redhat.com/), there no "driver binding" event that re-triggers deferred probe causing the loop, so the system progresses to a point where extcon is available and dwc3 driver eventually loads. After that, and I don't know if I'm doing the same test, USB host seems to work as expected. lsusb works, my USB stick enumerates as expected. Switching the USB mux to micro-USB and back shuts the host functionality down and brings it up as expected. Now I didn't try to load any gadgets to make sure USB gadget works 100%, but since you were saying it was USB host that was broken, I wasn't concerned with that. Am I doing the right test? For the reference what I test with is: - vanilla kernel, no patch delta (sans minor debug tracing) + initrd built with Buildroot 2022.08.1 - Initrd is using systemd (don't think that really matters, but who knows) - U-Boot 2022.04 (built with Buildroot as well) - kernel config is x86_64_defconfig + whatever I gathered from *.cfg files in https://github.com/edison-fw/meta-intel-edison/tree/master/meta-intel-edison-bsp/recipes-kernel/linux/files
On Sun, Oct 09, 2022 at 10:02:26PM -0700, Andrey Smirnov wrote: > On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: Thank you for the testing on your side! ... > OK, Ferry, I think I'm going to need clarification on specifics on > your test setup. Can you share your kernel config, maybe your > "/proc/config.gz", somewhere? When you say you are running vanilla > Linux, do you mean it or do you mean vanilla tree + some patch delta? > > The reason I'm asking is because I'm having a hard time reproducing > the problem on my end. In fact, when I build v6.0 > (4fe89d07dcc2804c8b562f6c7896a45643d34b2f) and then do a > > git revert 8bd6b8c4b100 0f0101719138 (original revert proposed by Andy) > > I get an infinite loop of reprobing that looks something like (some > debug tracing, function name + line number, included): Yes, this is (one of) known drawback(s) of deferred probe hack. I think the kernel that Ferry runs has a patch that basically reverts one from 2014 [1] and allows to have extcon as a module. (1) [1]: 58b116bce136 ("drivercore: deferral race condition fix") > which renders the system completely unusable, but USB host is > definitely going to be broken too. Now, ironically, with my patch > in-place, an attempt to probe extcon that ends up deferring the probe > happens before the ULPI driver failure (which wasn't failing driver > probe prior to https://lore.kernel.org/all/20220213130524.18748-7-hdegoede@redhat.com/), > there no "driver binding" event that re-triggers deferred probe > causing the loop, so the system progresses to a point where extcon is > available and dwc3 driver eventually loads. > > After that, and I don't know if I'm doing the same test, USB host > seems to work as expected. lsusb works, my USB stick enumerates as > expected. Switching the USB mux to micro-USB and back shuts the host > functionality down and brings it up as expected. Now I didn't try to > load any gadgets to make sure USB gadget works 100%, but since you > were saying it was USB host that was broken, I wasn't concerned with > that. Am I doing the right test? Hmm... What you described above sounds more like a yet another attempt to workaround (1). _If_ this is the case, we probably can discuss how to fix it in generic way (somewhere in dd.c, rather than in the certain driver). That said, the real test case should be performed on top of clean kernel before judging if it's good or bad. > For the reference what I test with is: > - vanilla kernel, no patch delta (sans minor debug tracing) + initrd > built with Buildroot 2022.08.1 > - Initrd is using systemd (don't think that really matters, but who knows) > - U-Boot 2022.04 (built with Buildroot as well) > - kernel config is x86_64_defconfig + whatever I gathered from *.cfg > files in https://github.com/edison-fw/meta-intel-edison/tree/master/meta-intel-edison-bsp/recipes-kernel/linux/files
Hi On 10-10-2022 07:02, Andrey Smirnov wrote: > On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: >> >> On 07-10-2022 04:11, Thinh Nguyen wrote: >>> On Thu, Oct 06, 2022, Ferry Toth wrote: >>>> Hi >>>> >>>> On 06-10-2022 04:12, Thinh Nguyen wrote: >>>>> On Wed, Oct 05, 2022, Ferry Toth wrote: >>>>>> Hi, >>>>>> >>>>>> Thanks! >>>>>> >>>>>> Does the failure only happen the first time host is initialized? Or can >>>>>> it recover after switching to device then back to host mode? >>>>>> >>>>>> I can switch back and forth and device mode works each time, host mode remains >>>>>> dead. >>>>> Ok. >>>>> >>>>>> Probably the failure happens if some step(s) in dwc3_core_init() hasn't >>>>>> completed. >>>>>> >>>>>> tusb1210 is a phy driver right? The issue is probably because we didn't >>>>>> initialize the phy yet. So, I suspect placing dwc3_get_extcon() after >>>>>> initializing the phy will probably solve the dependency problem. >>>>>> >>>>>> You can try something for yourself or I can provide something to test >>>>>> later if you don't mind (maybe next week if it's ok). >>>>>> >>>>>> Yes, the code move I mentioned above "moves dwc3_get_extcon() until after >>>>>> dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU initially >>>>>> dwc3_get_extcon() was called from within dwc3_core_init_mode() but only for >>>>>> case USB_DR_MODE_OTG. So with this change order of events is more or less >>>>>> unchanged" solves the issue. >>>>>> >>>>> I saw the experiment you did from the link you provided. We want to also >>>>> confirm exactly which step in dwc3_core_init() was needed. >>>> Ok. I first tried the code move suggested by Andrey (didn't work). Then >>>> after reading the actual code I moved a bit further. >>>> >>>> This move was on top of -rc6 without any reverts. I did not make additional >>>> changes to dwc3_core_init() >>>> >>>> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - >>>> dwc3_core_init - .. - dwc3_core_init_mode (not working) >>>> >>>> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - dwc3_core_init - .. >>>> - dwc3_core_init_mode (no change) >>>> >>>> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - dwc3_get_extcon - >>>> dwc3_core_init_mode (works) >>>> >>>> .. are what I believe for this issue irrelevant calls to >>>> dwc3_alloc_scratch_buffers, dwc3_check_params and dwc3_debugfs_init. >>>> >>> Right. Thanks for narrowing it down. There are still many steps in >>> dwc3_core_init(). We have some suspicion, but we still haven't confirmed >>> the exact cause of the failure. We can write a proper patch once we know >>> the reason. >> If you would like me to test your suspicion, just tell me what to do :-) > > OK, Ferry, I think I'm going to need clarification on specifics on > your test setup. Can you share your kernel config, maybe your > "/proc/config.gz", somewhere? When you say you are running vanilla > Linux, do you mean it or do you mean vanilla tree + some patch delta? For v6.0 I can get the exacts tonight. But earlier I had this for v5.17: https://github.com/htot/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_5.17.bb There are 2 patches referred in #67 and #68. One is related to the infinite loop. The other is I believe also needed to get dwc3 to work. All the kernel config are applied as .cfg. Patches and cfs's here: https://github.com/htot/meta-intel-edison/tree/master/meta-intel-edison-bsp/recipes-kernel/linux/files > The reason I'm asking is because I'm having a hard time reproducing > the problem on my end. In fact, when I build v6.0 > (4fe89d07dcc2804c8b562f6c7896a45643d34b2f) and then do a > > git revert 8bd6b8c4b100 0f0101719138 (original revert proposed by Andy) > > I get an infinite loop of reprobing that looks something like (some > debug tracing, function name + line number, included): > > [ 6.160732] tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 > to reg 0x80 > [ 6.172299] XXXXXXXXXXX: dwc3_probe 1834 > [ 6.172426] XXXXXXXXXXX: dwc3_core_init_mode 1386 > [ 6.176391] XXXXXXXXXXX: dwc3_drd_init 593 > [ 6.181573] dwc3 dwc3.0.auto: Driver dwc3 requests probe deferral > [ 6.191886] platform dwc3.0.auto: Added to deferred list > [ 6.197249] platform dwc3.0.auto: Retrying from deferred list > [ 6.203057] bus: 'platform': __driver_probe_device: matched device > dwc3.0.auto with driver dwc3 > [ 6.211783] bus: 'platform': really_probe: probing driver dwc3 with > device dwc3.0.auto > [ 6.219935] XXXXXXXXXXX: dwc3_probe 1822 > [ 6.219952] XXXXXXXXXXX: dwc3_core_init 1092 > [ 6.223903] XXXXXXXXXXX: dwc3_core_init 1095 > [ 6.234839] bus: 'ulpi': __driver_probe_device: matched device > dwc3.0.auto.ulpi with driver tusb1210 > [ 6.248335] bus: 'ulpi': really_probe: probing driver tusb1210 with > device dwc3.0.auto.ulpi > [ 6.257039] driver: 'tusb1210': driver_bound: bound to device > 'dwc3.0.auto.ulpi' > [ 6.264501] bus: 'ulpi': really_probe: bound device > dwc3.0.auto.ulpi to driver tusb1210 > [ 6.272553] debugfs: Directory 'dwc3.0.auto' with parent 'ulpi' > already present! > [ 6.279978] XXXXXXXXXXX: dwc3_core_init 1099 > [ 6.279991] XXXXXXXXXXX: dwc3_core_init 1103 > [ 6.345769] tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 > to reg 0x80 > [ 6.357316] XXXXXXXXXXX: dwc3_probe 1834 > [ 6.357447] XXXXXXXXXXX: dwc3_core_init_mode 1386 > [ 6.361402] XXXXXXXXXXX: dwc3_drd_init 593 > [ 6.366589] dwc3 dwc3.0.auto: Driver dwc3 requests probe deferral > [ 6.376901] platform dwc3.0.auto: Added to deferred list > > which renders the system completely unusable, but USB host is > definitely going to be broken too. Now, ironically, with my patch > in-place, an attempt to probe extcon that ends up deferring the probe > happens before the ULPI driver failure (which wasn't failing driver > probe prior to https://lore.kernel.org/all/20220213130524.18748-7-hdegoede@redhat.com/), > there no "driver binding" event that re-triggers deferred probe > causing the loop, so the system progresses to a point where extcon is > available and dwc3 driver eventually loads. > > After that, and I don't know if I'm doing the same test, USB host > seems to work as expected. lsusb works, my USB stick enumerates as > expected. Switching the USB mux to micro-USB and back shuts the host > functionality down and brings it up as expected. Now I didn't try to > load any gadgets to make sure USB gadget works 100%, but since you > were saying it was USB host that was broken, I wasn't concerned with > that. Am I doing the right test? > > For the reference what I test with is: > - vanilla kernel, no patch delta (sans minor debug tracing) + initrd > built with Buildroot 2022.08.1 > - Initrd is using systemd (don't think that really matters, but who knows) > - U-Boot 2022.04 (built with Buildroot as well) > - kernel config is x86_64_defconfig + whatever I gathered from *.cfg > files in https://github.com/edison-fw/meta-intel-edison/tree/master/meta-intel-edison-bsp/recipes-kernel/linux/files
Hi Op 10-10-2022 om 13:04 schreef Ferry Toth: > Hi > > On 10-10-2022 07:02, Andrey Smirnov wrote: >> On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: >>> >>> On 07-10-2022 04:11, Thinh Nguyen wrote: >>>> On Thu, Oct 06, 2022, Ferry Toth wrote: >>>>> Hi >>>>> >>>>> On 06-10-2022 04:12, Thinh Nguyen wrote: >>>>>> On Wed, Oct 05, 2022, Ferry Toth wrote: >>>>>>> Hi, >>>>>>> >>>>>>> Thanks! >>>>>>> >>>>>>> Does the failure only happen the first time host is >>>>>>> initialized? Or can >>>>>>> it recover after switching to device then back to host mode? >>>>>>> >>>>>>> I can switch back and forth and device mode works each time, >>>>>>> host mode remains >>>>>>> dead. >>>>>> Ok. >>>>>> >>>>>>> Probably the failure happens if some step(s) in >>>>>>> dwc3_core_init() hasn't >>>>>>> completed. >>>>>>> >>>>>>> tusb1210 is a phy driver right? The issue is probably >>>>>>> because we didn't >>>>>>> initialize the phy yet. So, I suspect placing >>>>>>> dwc3_get_extcon() after >>>>>>> initializing the phy will probably solve the dependency >>>>>>> problem. >>>>>>> >>>>>>> You can try something for yourself or I can provide >>>>>>> something to test >>>>>>> later if you don't mind (maybe next week if it's ok). >>>>>>> >>>>>>> Yes, the code move I mentioned above "moves dwc3_get_extcon() >>>>>>> until after >>>>>>> dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU >>>>>>> initially >>>>>>> dwc3_get_extcon() was called from within dwc3_core_init_mode() >>>>>>> but only for >>>>>>> case USB_DR_MODE_OTG. So with this change order of events is >>>>>>> more or less >>>>>>> unchanged" solves the issue. >>>>>>> >>>>>> I saw the experiment you did from the link you provided. We want >>>>>> to also >>>>>> confirm exactly which step in dwc3_core_init() was needed. >>>>> Ok. I first tried the code move suggested by Andrey (didn't work). >>>>> Then >>>>> after reading the actual code I moved a bit further. >>>>> >>>>> This move was on top of -rc6 without any reverts. I did not make >>>>> additional >>>>> changes to dwc3_core_init() >>>>> >>>>> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - >>>>> dwc3_core_init - .. - dwc3_core_init_mode (not working) >>>>> >>>>> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - >>>>> dwc3_core_init - .. >>>>> - dwc3_core_init_mode (no change) >>>>> >>>>> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - >>>>> dwc3_get_extcon - >>>>> dwc3_core_init_mode (works) >>>>> >>>>> .. are what I believe for this issue irrelevant calls to >>>>> dwc3_alloc_scratch_buffers, dwc3_check_params and dwc3_debugfs_init. >>>>> >>>> Right. Thanks for narrowing it down. There are still many steps in >>>> dwc3_core_init(). We have some suspicion, but we still haven't >>>> confirmed >>>> the exact cause of the failure. We can write a proper patch once we >>>> know >>>> the reason. >>> If you would like me to test your suspicion, just tell me what to do >>> :-) >> >> OK, Ferry, I think I'm going to need clarification on specifics on >> your test setup. Can you share your kernel config, maybe your >> "/proc/config.gz", somewhere? When you say you are running vanilla >> Linux, do you mean it or do you mean vanilla tree + some patch delta? > > For v6.0 I can get the exacts tonight. But earlier I had this for v5.17: > > https://github.com/htot/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_5.17.bb > > > There are 2 patches referred in #67 and #68. One is related to the > infinite loop. The other is I believe also needed to get dwc3 to work. > > All the kernel config are applied as .cfg. > > Patches and cfs's here: > > https://github.com/htot/meta-intel-edison/tree/master/meta-intel-edison-bsp/recipes-kernel/linux/files > Updated Yocto recipe for v6.0 here: https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb #75-#77 are the 2 reverts from Andy, + one SOF revert (not related to this thread). Otherwise via the git route, https://github.com/andy-shev/linux should lead to the same, although you might want to drop "WIP: serial: 8250_dma: use sgl on transmit " > >> The reason I'm asking is because I'm having a hard time reproducing >> the problem on my end. In fact, when I build v6.0 >> (4fe89d07dcc2804c8b562f6c7896a45643d34b2f) and then do a >> >> git revert 8bd6b8c4b100 0f0101719138 (original revert proposed by Andy) >> >> I get an infinite loop of reprobing that looks something like (some >> debug tracing, function name + line number, included): >> >> [ 6.160732] tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 >> to reg 0x80 >> [ 6.172299] XXXXXXXXXXX: dwc3_probe 1834 >> [ 6.172426] XXXXXXXXXXX: dwc3_core_init_mode 1386 >> [ 6.176391] XXXXXXXXXXX: dwc3_drd_init 593 >> [ 6.181573] dwc3 dwc3.0.auto: Driver dwc3 requests probe deferral >> [ 6.191886] platform dwc3.0.auto: Added to deferred list >> [ 6.197249] platform dwc3.0.auto: Retrying from deferred list >> [ 6.203057] bus: 'platform': __driver_probe_device: matched device >> dwc3.0.auto with driver dwc3 >> [ 6.211783] bus: 'platform': really_probe: probing driver dwc3 with >> device dwc3.0.auto >> [ 6.219935] XXXXXXXXXXX: dwc3_probe 1822 >> [ 6.219952] XXXXXXXXXXX: dwc3_core_init 1092 >> [ 6.223903] XXXXXXXXXXX: dwc3_core_init 1095 >> [ 6.234839] bus: 'ulpi': __driver_probe_device: matched device >> dwc3.0.auto.ulpi with driver tusb1210 >> [ 6.248335] bus: 'ulpi': really_probe: probing driver tusb1210 with >> device dwc3.0.auto.ulpi >> [ 6.257039] driver: 'tusb1210': driver_bound: bound to device >> 'dwc3.0.auto.ulpi' >> [ 6.264501] bus: 'ulpi': really_probe: bound device >> dwc3.0.auto.ulpi to driver tusb1210 >> [ 6.272553] debugfs: Directory 'dwc3.0.auto' with parent 'ulpi' >> already present! >> [ 6.279978] XXXXXXXXXXX: dwc3_core_init 1099 >> [ 6.279991] XXXXXXXXXXX: dwc3_core_init 1103 >> [ 6.345769] tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 >> to reg 0x80 >> [ 6.357316] XXXXXXXXXXX: dwc3_probe 1834 >> [ 6.357447] XXXXXXXXXXX: dwc3_core_init_mode 1386 >> [ 6.361402] XXXXXXXXXXX: dwc3_drd_init 593 >> [ 6.366589] dwc3 dwc3.0.auto: Driver dwc3 requests probe deferral >> [ 6.376901] platform dwc3.0.auto: Added to deferred list >> >> which renders the system completely unusable, but USB host is >> definitely going to be broken too. Now, ironically, with my patch >> in-place, an attempt to probe extcon that ends up deferring the probe >> happens before the ULPI driver failure (which wasn't failing driver >> probe prior to >> https://lore.kernel.org/all/20220213130524.18748-7-hdegoede@redhat.com/), >> there no "driver binding" event that re-triggers deferred probe >> causing the loop, so the system progresses to a point where extcon is >> available and dwc3 driver eventually loads. >> >> After that, and I don't know if I'm doing the same test, USB host >> seems to work as expected. lsusb works, my USB stick enumerates as >> expected. Switching the USB mux to micro-USB and back shuts the host >> functionality down and brings it up as expected. Now I didn't try to >> load any gadgets to make sure USB gadget works 100%, but since you >> were saying it was USB host that was broken, I wasn't concerned with >> that. Am I doing the right test? >> >> For the reference what I test with is: >> - vanilla kernel, no patch delta (sans minor debug tracing) + initrd >> built with Buildroot 2022.08.1 >> - Initrd is using systemd (don't think that really matters, but who >> knows) >> - U-Boot 2022.04 (built with Buildroot as well) >> - kernel config is x86_64_defconfig + whatever I gathered from *.cfg >> files in >> https://github.com/edison-fw/meta-intel-edison/tree/master/meta-intel-edison-bsp/recipes-kernel/linux/files
On Mon, Oct 10, 2022 at 1:52 PM Ferry Toth <fntoth@gmail.com> wrote: > > Hi > > Op 10-10-2022 om 13:04 schreef Ferry Toth: > > Hi > > > > On 10-10-2022 07:02, Andrey Smirnov wrote: > >> On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: > >>> > >>> On 07-10-2022 04:11, Thinh Nguyen wrote: > >>>> On Thu, Oct 06, 2022, Ferry Toth wrote: > >>>>> Hi > >>>>> > >>>>> On 06-10-2022 04:12, Thinh Nguyen wrote: > >>>>>> On Wed, Oct 05, 2022, Ferry Toth wrote: > >>>>>>> Hi, > >>>>>>> > >>>>>>> Thanks! > >>>>>>> > >>>>>>> Does the failure only happen the first time host is > >>>>>>> initialized? Or can > >>>>>>> it recover after switching to device then back to host mode? > >>>>>>> > >>>>>>> I can switch back and forth and device mode works each time, > >>>>>>> host mode remains > >>>>>>> dead. > >>>>>> Ok. > >>>>>> > >>>>>>> Probably the failure happens if some step(s) in > >>>>>>> dwc3_core_init() hasn't > >>>>>>> completed. > >>>>>>> > >>>>>>> tusb1210 is a phy driver right? The issue is probably > >>>>>>> because we didn't > >>>>>>> initialize the phy yet. So, I suspect placing > >>>>>>> dwc3_get_extcon() after > >>>>>>> initializing the phy will probably solve the dependency > >>>>>>> problem. > >>>>>>> > >>>>>>> You can try something for yourself or I can provide > >>>>>>> something to test > >>>>>>> later if you don't mind (maybe next week if it's ok). > >>>>>>> > >>>>>>> Yes, the code move I mentioned above "moves dwc3_get_extcon() > >>>>>>> until after > >>>>>>> dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU > >>>>>>> initially > >>>>>>> dwc3_get_extcon() was called from within dwc3_core_init_mode() > >>>>>>> but only for > >>>>>>> case USB_DR_MODE_OTG. So with this change order of events is > >>>>>>> more or less > >>>>>>> unchanged" solves the issue. > >>>>>>> > >>>>>> I saw the experiment you did from the link you provided. We want > >>>>>> to also > >>>>>> confirm exactly which step in dwc3_core_init() was needed. > >>>>> Ok. I first tried the code move suggested by Andrey (didn't work). > >>>>> Then > >>>>> after reading the actual code I moved a bit further. > >>>>> > >>>>> This move was on top of -rc6 without any reverts. I did not make > >>>>> additional > >>>>> changes to dwc3_core_init() > >>>>> > >>>>> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - > >>>>> dwc3_core_init - .. - dwc3_core_init_mode (not working) > >>>>> > >>>>> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - > >>>>> dwc3_core_init - .. > >>>>> - dwc3_core_init_mode (no change) > >>>>> > >>>>> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - > >>>>> dwc3_get_extcon - > >>>>> dwc3_core_init_mode (works) > >>>>> > >>>>> .. are what I believe for this issue irrelevant calls to > >>>>> dwc3_alloc_scratch_buffers, dwc3_check_params and dwc3_debugfs_init. > >>>>> > >>>> Right. Thanks for narrowing it down. There are still many steps in > >>>> dwc3_core_init(). We have some suspicion, but we still haven't > >>>> confirmed > >>>> the exact cause of the failure. We can write a proper patch once we > >>>> know > >>>> the reason. > >>> If you would like me to test your suspicion, just tell me what to do > >>> :-) > >> > >> OK, Ferry, I think I'm going to need clarification on specifics on > >> your test setup. Can you share your kernel config, maybe your > >> "/proc/config.gz", somewhere? When you say you are running vanilla > >> Linux, do you mean it or do you mean vanilla tree + some patch delta? > > > > For v6.0 I can get the exacts tonight. But earlier I had this for v5.17: > > > > https://github.com/htot/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_5.17.bb > > > > > > There are 2 patches referred in #67 and #68. One is related to the > > infinite loop. The other is I believe also needed to get dwc3 to work. > > > > All the kernel config are applied as .cfg. > > > > Patches and cfs's here: > > > > https://github.com/htot/meta-intel-edison/tree/master/meta-intel-edison-bsp/recipes-kernel/linux/files > > > > Updated Yocto recipe for v6.0 here: > > https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb > > #75-#77 are the 2 reverts from Andy, + one SOF revert (not related to > this thread). Please drop all of this https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb#L69-L77 and re do the testing. Assuming things are still broken, that's how you want to do the bisecting.
On Mon, Oct 10, 2022 at 12:13 AM Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > > On Sun, Oct 09, 2022 at 10:02:26PM -0700, Andrey Smirnov wrote: > > On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: > > Thank you for the testing on your side! > > ... > > > OK, Ferry, I think I'm going to need clarification on specifics on > > your test setup. Can you share your kernel config, maybe your > > "/proc/config.gz", somewhere? When you say you are running vanilla > > Linux, do you mean it or do you mean vanilla tree + some patch delta? > > > > The reason I'm asking is because I'm having a hard time reproducing > > the problem on my end. In fact, when I build v6.0 > > (4fe89d07dcc2804c8b562f6c7896a45643d34b2f) and then do a > > > > git revert 8bd6b8c4b100 0f0101719138 (original revert proposed by Andy) > > > > I get an infinite loop of reprobing that looks something like (some > > debug tracing, function name + line number, included): > > Yes, this is (one of) known drawback(s) of deferred probe hack. I think > the kernel that Ferry runs has a patch that basically reverts one from > 2014 [1] and allows to have extcon as a module. (1) > > [1]: 58b116bce136 ("drivercore: deferral race condition fix") > > > which renders the system completely unusable, but USB host is > > definitely going to be broken too. Now, ironically, with my patch > > in-place, an attempt to probe extcon that ends up deferring the probe > > happens before the ULPI driver failure (which wasn't failing driver > > probe prior to https://lore.kernel.org/all/20220213130524.18748-7-hdegoede@redhat.com/), > > there no "driver binding" event that re-triggers deferred probe > > causing the loop, so the system progresses to a point where extcon is > > available and dwc3 driver eventually loads. > > > > After that, and I don't know if I'm doing the same test, USB host > > seems to work as expected. lsusb works, my USB stick enumerates as > > expected. Switching the USB mux to micro-USB and back shuts the host > > functionality down and brings it up as expected. Now I didn't try to > > load any gadgets to make sure USB gadget works 100%, but since you > > were saying it was USB host that was broken, I wasn't concerned with > > that. Am I doing the right test? > > Hmm... What you described above sounds more like a yet another attempt to > workaround (1). _If_ this is the case, we probably can discuss how to fix > it in generic way (somewhere in dd.c, rather than in the certain driver). > No, I'm not describing an attempt to fix anything. Just how vanilla v6.0 (where my patch is not reverted) works and where my patch, fixing a logical problem in which extcon was requested too late causing a forced OTG -> "gadget only" switch, also changed the ordering enough to accidentally avoid the loop. > That said, the real test case should be performed on top of clean kernel > before judging if it's good or bad. > Given your level of involvemnt with this particular platform and you being the author of https://github.com/edison-fw/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/files/0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch I assumed/expected you to double check this before sending this revert out. Please do so next time.
On Mon, Oct 10, 2022 at 02:40:30PM -0700, Andrey Smirnov wrote: > On Mon, Oct 10, 2022 at 12:13 AM Andy Shevchenko > <andriy.shevchenko@linux.intel.com> wrote: > > On Sun, Oct 09, 2022 at 10:02:26PM -0700, Andrey Smirnov wrote: > > > On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: ... > > > OK, Ferry, I think I'm going to need clarification on specifics on > > > your test setup. Can you share your kernel config, maybe your > > > "/proc/config.gz", somewhere? When you say you are running vanilla > > > Linux, do you mean it or do you mean vanilla tree + some patch delta? > > > > > > The reason I'm asking is because I'm having a hard time reproducing > > > the problem on my end. In fact, when I build v6.0 > > > (4fe89d07dcc2804c8b562f6c7896a45643d34b2f) and then do a > > > > > > git revert 8bd6b8c4b100 0f0101719138 (original revert proposed by Andy) > > > > > > I get an infinite loop of reprobing that looks something like (some > > > debug tracing, function name + line number, included): > > > > Yes, this is (one of) known drawback(s) of deferred probe hack. I think > > the kernel that Ferry runs has a patch that basically reverts one from > > 2014 [1] and allows to have extcon as a module. (1) > > > > [1]: 58b116bce136 ("drivercore: deferral race condition fix") > > > > > which renders the system completely unusable, but USB host is > > > definitely going to be broken too. Now, ironically, with my patch > > > in-place, an attempt to probe extcon that ends up deferring the probe > > > happens before the ULPI driver failure (which wasn't failing driver > > > probe prior to https://lore.kernel.org/all/20220213130524.18748-7-hdegoede@redhat.com/), > > > there no "driver binding" event that re-triggers deferred probe > > > causing the loop, so the system progresses to a point where extcon is > > > available and dwc3 driver eventually loads. > > > > > > After that, and I don't know if I'm doing the same test, USB host > > > seems to work as expected. lsusb works, my USB stick enumerates as > > > expected. Switching the USB mux to micro-USB and back shuts the host > > > functionality down and brings it up as expected. Now I didn't try to > > > load any gadgets to make sure USB gadget works 100%, but since you > > > were saying it was USB host that was broken, I wasn't concerned with > > > that. Am I doing the right test? > > > > Hmm... What you described above sounds more like a yet another attempt to > > workaround (1). _If_ this is the case, we probably can discuss how to fix > > it in generic way (somewhere in dd.c, rather than in the certain driver). > > No, I'm not describing an attempt to fix anything. Just how vanilla > v6.0 (where my patch is not reverted) works and where my patch, fixing > a logical problem in which extcon was requested too late causing a > forced OTG -> "gadget only" switch, also changed the ordering enough > to accidentally avoid the loop. You still refer to a fix, but my question was if it's the same problem or not. > > That said, the real test case should be performed on top of clean kernel > > before judging if it's good or bad. > > Given your level of involvemnt with this particular platform and you > being the author of > https://github.com/edison-fw/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/files/0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch > I assumed/expected you to double check this before sending this revert > out. Please do so next time. As I said I have not yet restored my testing environment for that platform and I relied on the Ferry's report. Taking into account the history of breakages that done for Intel Merrifield, in particular by not wide tested patches against DWC3 driver, I immediately react with a revert. I agree that I had had to think about that ordering issue and a patch on top of it first. I thought that Yocto configuration that Ferry is using is clean of custom (controversial) patches like that. Now, since you have this platform, you can also help with testing the DWC3 on it.
Hi On 11-10-2022 11:21, Andy Shevchenko wrote: > On Mon, Oct 10, 2022 at 02:40:30PM -0700, Andrey Smirnov wrote: >> On Mon, Oct 10, 2022 at 12:13 AM Andy Shevchenko >> <andriy.shevchenko@linux.intel.com> wrote: >>> On Sun, Oct 09, 2022 at 10:02:26PM -0700, Andrey Smirnov wrote: >>>> On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth<fntoth@gmail.com> wrote: > ... > >>>> OK, Ferry, I think I'm going to need clarification on specifics on >>>> your test setup. Can you share your kernel config, maybe your >>>> "/proc/config.gz", somewhere? When you say you are running vanilla >>>> Linux, do you mean it or do you mean vanilla tree + some patch delta? >>>> >>>> The reason I'm asking is because I'm having a hard time reproducing >>>> the problem on my end. In fact, when I build v6.0 >>>> (4fe89d07dcc2804c8b562f6c7896a45643d34b2f) and then do a >>>> >>>> git revert 8bd6b8c4b100 0f0101719138 (original revert proposed by Andy) >>>> >>>> I get an infinite loop of reprobing that looks something like (some >>>> debug tracing, function name + line number, included): >>> Yes, this is (one of) known drawback(s) of deferred probe hack. I think >>> the kernel that Ferry runs has a patch that basically reverts one from >>> 2014 [1] and allows to have extcon as a module. (1) >>> >>> [1]: 58b116bce136 ("drivercore: deferral race condition fix") >>> >>>> which renders the system completely unusable, but USB host is >>>> definitely going to be broken too. Now, ironically, with my patch >>>> in-place, an attempt to probe extcon that ends up deferring the probe >>>> happens before the ULPI driver failure (which wasn't failing driver >>>> probe prior tohttps://lore.kernel.org/all/20220213130524.18748-7-hdegoede@redhat.com/), >>>> there no "driver binding" event that re-triggers deferred probe >>>> causing the loop, so the system progresses to a point where extcon is >>>> available and dwc3 driver eventually loads. >>>> >>>> After that, and I don't know if I'm doing the same test, USB host >>>> seems to work as expected. lsusb works, my USB stick enumerates as >>>> expected. Switching the USB mux to micro-USB and back shuts the host >>>> functionality down and brings it up as expected. Now I didn't try to >>>> load any gadgets to make sure USB gadget works 100%, but since you >>>> were saying it was USB host that was broken, I wasn't concerned with >>>> that. Am I doing the right test? >>> Hmm... What you described above sounds more like a yet another attempt to >>> workaround (1). _If_ this is the case, we probably can discuss how to fix >>> it in generic way (somewhere in dd.c, rather than in the certain driver). >> No, I'm not describing an attempt to fix anything. Just how vanilla >> v6.0 (where my patch is not reverted) works and where my patch, fixing >> a logical problem in which extcon was requested too late causing a >> forced OTG -> "gadget only" switch, also changed the ordering enough >> to accidentally avoid the loop. > You still refer to a fix, but my question was if it's the same problem or not. > >>> That said, the real test case should be performed on top of clean kernel >>> before judging if it's good or bad. >> Given your level of involvemnt with this particular platform and you >> being the author of >> https://github.com/edison-fw/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/files/0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch >> I assumed/expected you to double check this before sending this revert >> out. Please do so next time. > As I said I have not yet restored my testing environment for that platform and > I relied on the Ferry's report. Taking into account the history of breakages > that done for Intel Merrifield, in particular by not wide tested patches > against DWC3 driver, I immediately react with a revert. I agree that I had had > to think about that ordering issue and a patch on top of it first. I thought > that Yocto configuration that Ferry is using is clean of custom (controversial) > patches like that. Now, since you have this platform, you can also help with > testing the DWC3 on it. > It's my fault I'm afraid. My apologies. We have been needing the "Break infinite loop" patch at least since v5.10. Without IIRC we can not even boot normally or at least we have no dwc3. No way to bisect. In my mind I didn't link that patch to this issue. Other patches were indeed removed during bisecting. But it is interesting that we should be able to drop that patch for v6.0, even if that is a side effect. I will try to reproduce and report back here.
Hi, Op 10-10-2022 om 23:35 schreef Andrey Smirnov: > On Mon, Oct 10, 2022 at 1:52 PM Ferry Toth <fntoth@gmail.com> wrote: >> >> Hi >> >> Op 10-10-2022 om 13:04 schreef Ferry Toth: >>> Hi >>> >>> On 10-10-2022 07:02, Andrey Smirnov wrote: >>>> On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: >>>>> >>>>> On 07-10-2022 04:11, Thinh Nguyen wrote: >>>>>> On Thu, Oct 06, 2022, Ferry Toth wrote: >>>>>>> Hi >>>>>>> >>>>>>> On 06-10-2022 04:12, Thinh Nguyen wrote: >>>>>>>> On Wed, Oct 05, 2022, Ferry Toth wrote: >>>>>>>>> Hi, >>>>>>>>> >>>>>>>>> Thanks! >>>>>>>>> >>>>>>>>> Does the failure only happen the first time host is >>>>>>>>> initialized? Or can >>>>>>>>> it recover after switching to device then back to host mode? >>>>>>>>> >>>>>>>>> I can switch back and forth and device mode works each time, >>>>>>>>> host mode remains >>>>>>>>> dead. >>>>>>>> Ok. >>>>>>>> >>>>>>>>> Probably the failure happens if some step(s) in >>>>>>>>> dwc3_core_init() hasn't >>>>>>>>> completed. >>>>>>>>> >>>>>>>>> tusb1210 is a phy driver right? The issue is probably >>>>>>>>> because we didn't >>>>>>>>> initialize the phy yet. So, I suspect placing >>>>>>>>> dwc3_get_extcon() after >>>>>>>>> initializing the phy will probably solve the dependency >>>>>>>>> problem. >>>>>>>>> >>>>>>>>> You can try something for yourself or I can provide >>>>>>>>> something to test >>>>>>>>> later if you don't mind (maybe next week if it's ok). >>>>>>>>> >>>>>>>>> Yes, the code move I mentioned above "moves dwc3_get_extcon() >>>>>>>>> until after >>>>>>>>> dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU >>>>>>>>> initially >>>>>>>>> dwc3_get_extcon() was called from within dwc3_core_init_mode() >>>>>>>>> but only for >>>>>>>>> case USB_DR_MODE_OTG. So with this change order of events is >>>>>>>>> more or less >>>>>>>>> unchanged" solves the issue. >>>>>>>>> >>>>>>>> I saw the experiment you did from the link you provided. We want >>>>>>>> to also >>>>>>>> confirm exactly which step in dwc3_core_init() was needed. >>>>>>> Ok. I first tried the code move suggested by Andrey (didn't work). >>>>>>> Then >>>>>>> after reading the actual code I moved a bit further. >>>>>>> >>>>>>> This move was on top of -rc6 without any reverts. I did not make >>>>>>> additional >>>>>>> changes to dwc3_core_init() >>>>>>> >>>>>>> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - >>>>>>> dwc3_core_init - .. - dwc3_core_init_mode (not working) >>>>>>> >>>>>>> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - >>>>>>> dwc3_core_init - .. >>>>>>> - dwc3_core_init_mode (no change) >>>>>>> >>>>>>> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - >>>>>>> dwc3_get_extcon - >>>>>>> dwc3_core_init_mode (works) >>>>>>> >>>>>>> .. are what I believe for this issue irrelevant calls to >>>>>>> dwc3_alloc_scratch_buffers, dwc3_check_params and dwc3_debugfs_init. >>>>>>> >>>>>> Right. Thanks for narrowing it down. There are still many steps in >>>>>> dwc3_core_init(). We have some suspicion, but we still haven't >>>>>> confirmed >>>>>> the exact cause of the failure. We can write a proper patch once we >>>>>> know >>>>>> the reason. >>>>> If you would like me to test your suspicion, just tell me what to do >>>>> :-) >>>> >>>> OK, Ferry, I think I'm going to need clarification on specifics on >>>> your test setup. Can you share your kernel config, maybe your >>>> "/proc/config.gz", somewhere? When you say you are running vanilla >>>> Linux, do you mean it or do you mean vanilla tree + some patch delta? >>> >>> For v6.0 I can get the exacts tonight. But earlier I had this for v5.17: >>> >>> https://github.com/htot/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_5.17.bb >>> >>> >>> There are 2 patches referred in #67 and #68. One is related to the >>> infinite loop. The other is I believe also needed to get dwc3 to work. >>> >>> All the kernel config are applied as .cfg. >>> >>> Patches and cfs's here: >>> >>> https://github.com/htot/meta-intel-edison/tree/master/meta-intel-edison-bsp/recipes-kernel/linux/files >>> >> >> Updated Yocto recipe for v6.0 here: >> >> https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb >> >> #75-#77 are the 2 reverts from Andy, + one SOF revert (not related to >> this thread). > > Please drop all of this > https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb#L69-L77 > and re do the testing. Assuming things are still broken, that's how > you want to do the bisecting. I removed 4 patches: 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch 0044-REVERTME-usb-dwc3-gadget-skip-endpoints-ep-18-in-out.patch 0001-Revert-USB-fixup-for-merge-issue-with-usb-dwc3-Don-t.patch 0001-Revert-usb-dwc3-Don-t-switch-OTG-peripheral-if-extco.patch and indeed as you expect kernel boots (no infinite loop). However dwc3 host mode is not working as in your case, device mode works fine (Yocto configures a set of gadgets for me). Just to be sure if I could have bisected without 0043a I added back the 2 0001-Revert* and indeed I run into the infinite loop with the console spitting out continuous: debugfs: Directory 'dwc3.0.auto' with parent 'ulpi' already present! tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 to reg 0x80 so yes it seems either 0043b or your patch "usb: dwc3: Don't switch OTG -> peripheral if extcon is present" is needed to boot (break the infinite loop). But your patch is in my case not sufficient to make host mode work. As I understand it depends a bit on the timing, I might have a different initrd (built by Yocto vs. Buildroot). F.i. I see I have extcon-intel-mrfld in initrd and dwc3 / phy-tusb1210 built-in.
On Tue, Oct 11, 2022 at 2:21 AM Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > > On Mon, Oct 10, 2022 at 02:40:30PM -0700, Andrey Smirnov wrote: > > On Mon, Oct 10, 2022 at 12:13 AM Andy Shevchenko > > <andriy.shevchenko@linux.intel.com> wrote: > > > On Sun, Oct 09, 2022 at 10:02:26PM -0700, Andrey Smirnov wrote: > > > > On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: > > ... > > > > > OK, Ferry, I think I'm going to need clarification on specifics on > > > > your test setup. Can you share your kernel config, maybe your > > > > "/proc/config.gz", somewhere? When you say you are running vanilla > > > > Linux, do you mean it or do you mean vanilla tree + some patch delta? > > > > > > > > The reason I'm asking is because I'm having a hard time reproducing > > > > the problem on my end. In fact, when I build v6.0 > > > > (4fe89d07dcc2804c8b562f6c7896a45643d34b2f) and then do a > > > > > > > > git revert 8bd6b8c4b100 0f0101719138 (original revert proposed by Andy) > > > > > > > > I get an infinite loop of reprobing that looks something like (some > > > > debug tracing, function name + line number, included): > > > > > > Yes, this is (one of) known drawback(s) of deferred probe hack. I think > > > the kernel that Ferry runs has a patch that basically reverts one from > > > 2014 [1] and allows to have extcon as a module. (1) > > > > > > [1]: 58b116bce136 ("drivercore: deferral race condition fix") > > > > > > > which renders the system completely unusable, but USB host is > > > > definitely going to be broken too. Now, ironically, with my patch > > > > in-place, an attempt to probe extcon that ends up deferring the probe > > > > happens before the ULPI driver failure (which wasn't failing driver > > > > probe prior to https://lore.kernel.org/all/20220213130524.18748-7-hdegoede@redhat.com/), > > > > there no "driver binding" event that re-triggers deferred probe > > > > causing the loop, so the system progresses to a point where extcon is > > > > available and dwc3 driver eventually loads. > > > > > > > > After that, and I don't know if I'm doing the same test, USB host > > > > seems to work as expected. lsusb works, my USB stick enumerates as > > > > expected. Switching the USB mux to micro-USB and back shuts the host > > > > functionality down and brings it up as expected. Now I didn't try to > > > > load any gadgets to make sure USB gadget works 100%, but since you > > > > were saying it was USB host that was broken, I wasn't concerned with > > > > that. Am I doing the right test? > > > > > > Hmm... What you described above sounds more like a yet another attempt to > > > workaround (1). _If_ this is the case, we probably can discuss how to fix > > > it in generic way (somewhere in dd.c, rather than in the certain driver). > > > > No, I'm not describing an attempt to fix anything. Just how vanilla > > v6.0 (where my patch is not reverted) works and where my patch, fixing > > a logical problem in which extcon was requested too late causing a > > forced OTG -> "gadget only" switch, also changed the ordering enough > > to accidentally avoid the loop. > > You still refer to a fix, but my question was if it's the same problem or not. > No, it's not the same problem. > > > That said, the real test case should be performed on top of clean kernel > > > before judging if it's good or bad. > > > > Given your level of involvemnt with this particular platform and you > > being the author of > > https://github.com/edison-fw/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/files/0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch > > I assumed/expected you to double check this before sending this revert > > out. Please do so next time. > > As I said I have not yet restored my testing environment for that platform and > I relied on the Ferry's report. Taking into account the history of breakages > that done for Intel Merrifield, in particular by not wide tested patches > against DWC3 driver, I immediately react with a revert. That's what I'm asking you not to do next time. If you don't have time to restore your testing env or double check Ferry's work, please live with a revert in your local tree until you do. My time is as valuable as yours and this revert required much more investigation before it was submitted. You lived with https://github.com/edison-fw/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/files/0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch since 5.10, which apparently was needed to either boot or have dwc3, so I don't think there is any real urgency.
On Tue, Oct 11, 2022 at 11:54 AM Ferry Toth <fntoth@gmail.com> wrote: > > Hi, > > Op 10-10-2022 om 23:35 schreef Andrey Smirnov: > > On Mon, Oct 10, 2022 at 1:52 PM Ferry Toth <fntoth@gmail.com> wrote: > >> > >> Hi > >> > >> Op 10-10-2022 om 13:04 schreef Ferry Toth: > >>> Hi > >>> > >>> On 10-10-2022 07:02, Andrey Smirnov wrote: > >>>> On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: > >>>>> > >>>>> On 07-10-2022 04:11, Thinh Nguyen wrote: > >>>>>> On Thu, Oct 06, 2022, Ferry Toth wrote: > >>>>>>> Hi > >>>>>>> > >>>>>>> On 06-10-2022 04:12, Thinh Nguyen wrote: > >>>>>>>> On Wed, Oct 05, 2022, Ferry Toth wrote: > >>>>>>>>> Hi, > >>>>>>>>> > >>>>>>>>> Thanks! > >>>>>>>>> > >>>>>>>>> Does the failure only happen the first time host is > >>>>>>>>> initialized? Or can > >>>>>>>>> it recover after switching to device then back to host mode? > >>>>>>>>> > >>>>>>>>> I can switch back and forth and device mode works each time, > >>>>>>>>> host mode remains > >>>>>>>>> dead. > >>>>>>>> Ok. > >>>>>>>> > >>>>>>>>> Probably the failure happens if some step(s) in > >>>>>>>>> dwc3_core_init() hasn't > >>>>>>>>> completed. > >>>>>>>>> > >>>>>>>>> tusb1210 is a phy driver right? The issue is probably > >>>>>>>>> because we didn't > >>>>>>>>> initialize the phy yet. So, I suspect placing > >>>>>>>>> dwc3_get_extcon() after > >>>>>>>>> initializing the phy will probably solve the dependency > >>>>>>>>> problem. > >>>>>>>>> > >>>>>>>>> You can try something for yourself or I can provide > >>>>>>>>> something to test > >>>>>>>>> later if you don't mind (maybe next week if it's ok). > >>>>>>>>> > >>>>>>>>> Yes, the code move I mentioned above "moves dwc3_get_extcon() > >>>>>>>>> until after > >>>>>>>>> dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU > >>>>>>>>> initially > >>>>>>>>> dwc3_get_extcon() was called from within dwc3_core_init_mode() > >>>>>>>>> but only for > >>>>>>>>> case USB_DR_MODE_OTG. So with this change order of events is > >>>>>>>>> more or less > >>>>>>>>> unchanged" solves the issue. > >>>>>>>>> > >>>>>>>> I saw the experiment you did from the link you provided. We want > >>>>>>>> to also > >>>>>>>> confirm exactly which step in dwc3_core_init() was needed. > >>>>>>> Ok. I first tried the code move suggested by Andrey (didn't work). > >>>>>>> Then > >>>>>>> after reading the actual code I moved a bit further. > >>>>>>> > >>>>>>> This move was on top of -rc6 without any reverts. I did not make > >>>>>>> additional > >>>>>>> changes to dwc3_core_init() > >>>>>>> > >>>>>>> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - > >>>>>>> dwc3_core_init - .. - dwc3_core_init_mode (not working) > >>>>>>> > >>>>>>> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - > >>>>>>> dwc3_core_init - .. > >>>>>>> - dwc3_core_init_mode (no change) > >>>>>>> > >>>>>>> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - > >>>>>>> dwc3_get_extcon - > >>>>>>> dwc3_core_init_mode (works) > >>>>>>> > >>>>>>> .. are what I believe for this issue irrelevant calls to > >>>>>>> dwc3_alloc_scratch_buffers, dwc3_check_params and dwc3_debugfs_init. > >>>>>>> > >>>>>> Right. Thanks for narrowing it down. There are still many steps in > >>>>>> dwc3_core_init(). We have some suspicion, but we still haven't > >>>>>> confirmed > >>>>>> the exact cause of the failure. We can write a proper patch once we > >>>>>> know > >>>>>> the reason. > >>>>> If you would like me to test your suspicion, just tell me what to do > >>>>> :-) > >>>> > >>>> OK, Ferry, I think I'm going to need clarification on specifics on > >>>> your test setup. Can you share your kernel config, maybe your > >>>> "/proc/config.gz", somewhere? When you say you are running vanilla > >>>> Linux, do you mean it or do you mean vanilla tree + some patch delta? > >>> > >>> For v6.0 I can get the exacts tonight. But earlier I had this for v5.17: > >>> > >>> https://github.com/htot/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_5.17.bb > >>> > >>> > >>> There are 2 patches referred in #67 and #68. One is related to the > >>> infinite loop. The other is I believe also needed to get dwc3 to work. > >>> > >>> All the kernel config are applied as .cfg. > >>> > >>> Patches and cfs's here: > >>> > >>> https://github.com/htot/meta-intel-edison/tree/master/meta-intel-edison-bsp/recipes-kernel/linux/files > >>> > >> > >> Updated Yocto recipe for v6.0 here: > >> > >> https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb > >> > >> #75-#77 are the 2 reverts from Andy, + one SOF revert (not related to > >> this thread). > > > > Please drop all of this > > https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb#L69-L77 > > and re do the testing. Assuming things are still broken, that's how > > you want to do the bisecting. > > I removed 4 patches: > 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch > 0044-REVERTME-usb-dwc3-gadget-skip-endpoints-ep-18-in-out.patch > 0001-Revert-USB-fixup-for-merge-issue-with-usb-dwc3-Don-t.patch > 0001-Revert-usb-dwc3-Don-t-switch-OTG-peripheral-if-extco.patch Please remove all custom patches so we are on the same page. I don't suspect the 8250 related changes to affect anything, but I also would like to be testing the same thing. I'm testing vanilla v6.0 > > and indeed as you expect kernel boots (no infinite loop). However dwc3 > host mode is not working as in your case, device mode works fine (Yocto > configures a set of gadgets for me). What do you do to test host mode working? lsusb? Something else? Asking to make sure I'm doing something equivalent on my end. > > Just to be sure if I could have bisected without 0043a I added back the > 2 0001-Revert* and indeed I run into the infinite loop with the console > spitting out continuous: > debugfs: Directory 'dwc3.0.auto' with parent 'ulpi' already present! > tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 to reg 0x80 > > so yes it seems either 0043b or your patch "usb: dwc3: Don't switch OTG > -> peripheral if extcon is present" is needed to boot (break the > infinite loop). But your patch is in my case not sufficient to make host > mode work. > Next step would be to establish if USB is working before my patch. You should be able to avoid the boot loop if you disable the "phy-tusb1210" driver. The driver fails to probe anyway, so it's not very likely to be crucial for functioning, so it should allow you to try things with my patch reverted: git revert 8bd6b8c4b100 0f0101719138 After that, if things start working, it'd make sense to re-do your function re-arranging experiment to re-validate it. > As I understand it depends a bit on the timing, I might have a different > initrd (built by Yocto vs. Buildroot). F.i. I see I have > extcon-intel-mrfld in initrd and dwc3 / phy-tusb1210 built-in. > You mentioned that your rootfs image does some gadget configuration for you. Can this be disabled? If yes, it'd make sense to check if this could be a variable explaining the difference. What U-Boot version are you running? AFACT U-Boot will touch that particular IP block, so this might be somewhat relevant.
Hi On 11-10-2022 22:50, Andrey Smirnov wrote: > On Tue, Oct 11, 2022 at 11:54 AM Ferry Toth <fntoth@gmail.com> wrote: >> Hi, >> >> Op 10-10-2022 om 23:35 schreef Andrey Smirnov: >>> On Mon, Oct 10, 2022 at 1:52 PM Ferry Toth <fntoth@gmail.com> wrote: >>>> Hi >>>> >>>> Op 10-10-2022 om 13:04 schreef Ferry Toth: >>>>> Hi >>>>> >>>>> On 10-10-2022 07:02, Andrey Smirnov wrote: >>>>>> On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: >>>>>>> On 07-10-2022 04:11, Thinh Nguyen wrote: >>>>>>>> On Thu, Oct 06, 2022, Ferry Toth wrote: >>>>>>>>> Hi >>>>>>>>> >>>>>>>>> On 06-10-2022 04:12, Thinh Nguyen wrote: >>>>>>>>>> On Wed, Oct 05, 2022, Ferry Toth wrote: >>>>>>>>>>> Hi, >>>>>>>>>>> >>>>>>>>>>> Thanks! >>>>>>>>>>> >>>>>>>>>>> Does the failure only happen the first time host is >>>>>>>>>>> initialized? Or can >>>>>>>>>>> it recover after switching to device then back to host mode? >>>>>>>>>>> >>>>>>>>>>> I can switch back and forth and device mode works each time, >>>>>>>>>>> host mode remains >>>>>>>>>>> dead. >>>>>>>>>> Ok. >>>>>>>>>> >>>>>>>>>>> Probably the failure happens if some step(s) in >>>>>>>>>>> dwc3_core_init() hasn't >>>>>>>>>>> completed. >>>>>>>>>>> >>>>>>>>>>> tusb1210 is a phy driver right? The issue is probably >>>>>>>>>>> because we didn't >>>>>>>>>>> initialize the phy yet. So, I suspect placing >>>>>>>>>>> dwc3_get_extcon() after >>>>>>>>>>> initializing the phy will probably solve the dependency >>>>>>>>>>> problem. >>>>>>>>>>> >>>>>>>>>>> You can try something for yourself or I can provide >>>>>>>>>>> something to test >>>>>>>>>>> later if you don't mind (maybe next week if it's ok). >>>>>>>>>>> >>>>>>>>>>> Yes, the code move I mentioned above "moves dwc3_get_extcon() >>>>>>>>>>> until after >>>>>>>>>>> dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU >>>>>>>>>>> initially >>>>>>>>>>> dwc3_get_extcon() was called from within dwc3_core_init_mode() >>>>>>>>>>> but only for >>>>>>>>>>> case USB_DR_MODE_OTG. So with this change order of events is >>>>>>>>>>> more or less >>>>>>>>>>> unchanged" solves the issue. >>>>>>>>>>> >>>>>>>>>> I saw the experiment you did from the link you provided. We want >>>>>>>>>> to also >>>>>>>>>> confirm exactly which step in dwc3_core_init() was needed. >>>>>>>>> Ok. I first tried the code move suggested by Andrey (didn't work). >>>>>>>>> Then >>>>>>>>> after reading the actual code I moved a bit further. >>>>>>>>> >>>>>>>>> This move was on top of -rc6 without any reverts. I did not make >>>>>>>>> additional >>>>>>>>> changes to dwc3_core_init() >>>>>>>>> >>>>>>>>> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - >>>>>>>>> dwc3_core_init - .. - dwc3_core_init_mode (not working) >>>>>>>>> >>>>>>>>> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - >>>>>>>>> dwc3_core_init - .. >>>>>>>>> - dwc3_core_init_mode (no change) >>>>>>>>> >>>>>>>>> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - >>>>>>>>> dwc3_get_extcon - >>>>>>>>> dwc3_core_init_mode (works) >>>>>>>>> >>>>>>>>> .. are what I believe for this issue irrelevant calls to >>>>>>>>> dwc3_alloc_scratch_buffers, dwc3_check_params and dwc3_debugfs_init. >>>>>>>>> >>>>>>>> Right. Thanks for narrowing it down. There are still many steps in >>>>>>>> dwc3_core_init(). We have some suspicion, but we still haven't >>>>>>>> confirmed >>>>>>>> the exact cause of the failure. We can write a proper patch once we >>>>>>>> know >>>>>>>> the reason. >>>>>>> If you would like me to test your suspicion, just tell me what to do >>>>>>> :-) >>>>>> OK, Ferry, I think I'm going to need clarification on specifics on >>>>>> your test setup. Can you share your kernel config, maybe your >>>>>> "/proc/config.gz", somewhere? When you say you are running vanilla >>>>>> Linux, do you mean it or do you mean vanilla tree + some patch delta? >>>>> For v6.0 I can get the exacts tonight. But earlier I had this for v5.17: >>>>> >>>>> https://github.com/htot/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_5.17.bb >>>>> >>>>> >>>>> There are 2 patches referred in #67 and #68. One is related to the >>>>> infinite loop. The other is I believe also needed to get dwc3 to work. >>>>> >>>>> All the kernel config are applied as .cfg. >>>>> >>>>> Patches and cfs's here: >>>>> >>>>> https://github.com/htot/meta-intel-edison/tree/master/meta-intel-edison-bsp/recipes-kernel/linux/files >>>>> >>>> Updated Yocto recipe for v6.0 here: >>>> >>>> https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb >>>> >>>> #75-#77 are the 2 reverts from Andy, + one SOF revert (not related to >>>> this thread). >>> Please drop all of this >>> https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb#L69-L77 >>> and re do the testing. Assuming things are still broken, that's how >>> you want to do the bisecting. >> I removed 4 patches: >> 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch >> 0044-REVERTME-usb-dwc3-gadget-skip-endpoints-ep-18-in-out.patch >> 0001-Revert-USB-fixup-for-merge-issue-with-usb-dwc3-Don-t.patch >> 0001-Revert-usb-dwc3-Don-t-switch-OTG-peripheral-if-extco.patch > Please remove all custom patches so we are on the same page. I don't > suspect the 8250 related changes to affect anything, but I also would > like to be testing the same thing. I'm testing vanilla v6.0 Alright, but don't expect any change. The 8250 patches are related to using DMA for the serial ports (except the console). It may affect bluetooth, the serial port on the arduino connector, but not the console. >> and indeed as you expect kernel boots (no infinite loop). However dwc3 >> host mode is not working as in your case, device mode works fine (Yocto >> configures a set of gadgets for me). > What do you do to test host mode working? lsusb? Something else? > Asking to make sure I'm doing something equivalent on my end. > I have a smsc95xx 4p usb hub with 1 eth port continuously plugged. It has leds on all ports so when it works it lights up like a Christmas tree. But I also tried plugging a usb stick. It maybe that lsusb is not enough. Iirc the root hub is there, but the tusb1210 not and then device plugs are not detected. So in my case none of the leds on the hub turn on. >> Just to be sure if I could have bisected without 0043a I added back the >> 2 0001-Revert* and indeed I run into the infinite loop with the console >> spitting out continuous: >> debugfs: Directory 'dwc3.0.auto' with parent 'ulpi' already present! >> tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 to reg 0x80 >> >> so yes it seems either 0043b or your patch "usb: dwc3: Don't switch OTG >> -> peripheral if extcon is present" is needed to boot (break the >> infinite loop). But your patch is in my case not sufficient to make host >> mode work. >> > Next step would be to establish if USB is working before my patch. You > should be able to avoid the boot loop if you disable the > "phy-tusb1210" driver. The driver fails to probe anyway, so it's not > very likely to be crucial for functioning, so it should allow you to > try things with my patch reverted: You lost me here. With "boot loop" you mean "probe loop" right? Why do you think the tusb1210 driver is not crucial? See "phy: ti: tusb1210: Don't check for write errors when powering on" It should not be failing to probe (and with Andy's "Break-infinite-loop" patch is doesn't) as without the tusb1210 usb host mode won't work as device plugs are not detected. Earlier in this thread we had: "The effect of the patch is that on Merrifield (I tested with Intel Edison Arduino board which has a HW switch to select between host and device mode) device mode works but in host mode USB is completely not working. Currently on host mode - when working - superfluous error messages from tusb1210 appear. When host mode is not working there are no tusb1210 messages in the logs / on the console at all. Seemingly tusb1210 is not probed, which points in the direction of a relation to extcon." > git revert 8bd6b8c4b100 0f0101719138 > > After that, if things start working, it'd make sense to re-do your > function re-arranging experiment to re-validate it. > >> As I understand it depends a bit on the timing, I might have a different >> initrd (built by Yocto vs. Buildroot). F.i. I see I have >> extcon-intel-mrfld in initrd and dwc3 / phy-tusb1210 built-in. >> > You mentioned that your rootfs image does some gadget configuration > for you. Can this be disabled? If yes, it'd make sense to check if > this could be a variable explaining the difference. This is done through configfs only when the switch is set to device mode. > What U-Boot version are you running? AFACT U-Boot will touch that > particular IP block, so this might be somewhat relevant. IIRC if have v2022.04 but tested v2021.10 earlier (no difference).
On Tue, Oct 11, 2022 at 01:17:13PM -0700, Andrey Smirnov wrote: > On Tue, Oct 11, 2022 at 2:21 AM Andy Shevchenko > <andriy.shevchenko@linux.intel.com> wrote: > > On Mon, Oct 10, 2022 at 02:40:30PM -0700, Andrey Smirnov wrote: > > > On Mon, Oct 10, 2022 at 12:13 AM Andy Shevchenko > > > <andriy.shevchenko@linux.intel.com> wrote: > > > > On Sun, Oct 09, 2022 at 10:02:26PM -0700, Andrey Smirnov wrote: > > > > > On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: ... > > > > > OK, Ferry, I think I'm going to need clarification on specifics on > > > > > your test setup. Can you share your kernel config, maybe your > > > > > "/proc/config.gz", somewhere? When you say you are running vanilla > > > > > Linux, do you mean it or do you mean vanilla tree + some patch delta? > > > > > > > > > > The reason I'm asking is because I'm having a hard time reproducing > > > > > the problem on my end. In fact, when I build v6.0 > > > > > (4fe89d07dcc2804c8b562f6c7896a45643d34b2f) and then do a > > > > > > > > > > git revert 8bd6b8c4b100 0f0101719138 (original revert proposed by Andy) > > > > > > > > > > I get an infinite loop of reprobing that looks something like (some > > > > > debug tracing, function name + line number, included): > > > > > > > > Yes, this is (one of) known drawback(s) of deferred probe hack. I think > > > > the kernel that Ferry runs has a patch that basically reverts one from > > > > 2014 [1] and allows to have extcon as a module. (1) > > > > > > > > [1]: 58b116bce136 ("drivercore: deferral race condition fix") > > > > > > > > > which renders the system completely unusable, but USB host is > > > > > definitely going to be broken too. Now, ironically, with my patch > > > > > in-place, an attempt to probe extcon that ends up deferring the probe > > > > > happens before the ULPI driver failure (which wasn't failing driver > > > > > probe prior to https://lore.kernel.org/all/20220213130524.18748-7-hdegoede@redhat.com/), > > > > > there no "driver binding" event that re-triggers deferred probe > > > > > causing the loop, so the system progresses to a point where extcon is > > > > > available and dwc3 driver eventually loads. > > > > > > > > > > After that, and I don't know if I'm doing the same test, USB host > > > > > seems to work as expected. lsusb works, my USB stick enumerates as > > > > > expected. Switching the USB mux to micro-USB and back shuts the host > > > > > functionality down and brings it up as expected. Now I didn't try to > > > > > load any gadgets to make sure USB gadget works 100%, but since you > > > > > were saying it was USB host that was broken, I wasn't concerned with > > > > > that. Am I doing the right test? > > > > > > > > Hmm... What you described above sounds more like a yet another attempt to > > > > workaround (1). _If_ this is the case, we probably can discuss how to fix > > > > it in generic way (somewhere in dd.c, rather than in the certain driver). > > > > > > No, I'm not describing an attempt to fix anything. Just how vanilla > > > v6.0 (where my patch is not reverted) works and where my patch, fixing > > > a logical problem in which extcon was requested too late causing a > > > forced OTG -> "gadget only" switch, also changed the ordering enough > > > to accidentally avoid the loop. > > > > You still refer to a fix, but my question was if it's the same problem or not. > > > > No, it's not the same problem. > > > > > That said, the real test case should be performed on top of clean kernel > > > > before judging if it's good or bad. > > > > > > Given your level of involvemnt with this particular platform and you > > > being the author of > > > https://github.com/edison-fw/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/files/0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch > > > I assumed/expected you to double check this before sending this revert > > > out. Please do so next time. > > > > As I said I have not yet restored my testing environment for that platform and > > I relied on the Ferry's report. Taking into account the history of breakages > > that done for Intel Merrifield, in particular by not wide tested patches > > against DWC3 driver, I immediately react with a revert. > > That's what I'm asking you not to do next time. If you don't have time > to restore your testing env or double check Ferry's work, please live > with a revert in your local tree until you do. I trust Ferry's tests as mine and repeating again, we have a bad history when people so value their time that breaks our platform, so please test your changes in the future that it makes no regressions. If you want to have a proof that your patches are broken, then I will prioritize this. We now have a full release cycle time for that. > My time is as valuable > as yours and this revert required much more investigation before it > was submitted. You lived with > https://github.com/edison-fw/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/files/0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch > since 5.10, which apparently was needed to either boot or have dwc3, > so I don't think there is any real urgency. It is in my tree only for the purpose of "don't forget that issue". I think you can work around it by built-in extcon driver.
Hi Op 12-10-2022 om 11:30 schreef Ferry Toth: > Hi > > On 11-10-2022 22:50, Andrey Smirnov wrote: >> On Tue, Oct 11, 2022 at 11:54 AM Ferry Toth <fntoth@gmail.com> wrote: >>> Hi, >>> >>> Op 10-10-2022 om 23:35 schreef Andrey Smirnov: >>>> On Mon, Oct 10, 2022 at 1:52 PM Ferry Toth <fntoth@gmail.com> wrote: >>>>> Hi >>>>> >>>>> Op 10-10-2022 om 13:04 schreef Ferry Toth: >>>>>> Hi >>>>>> >>>>>> On 10-10-2022 07:02, Andrey Smirnov wrote: >>>>>>> On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: >>>>>>>> On 07-10-2022 04:11, Thinh Nguyen wrote: >>>>>>>>> On Thu, Oct 06, 2022, Ferry Toth wrote: >>>>>>>>>> Hi >>>>>>>>>> >>>>>>>>>> On 06-10-2022 04:12, Thinh Nguyen wrote: >>>>>>>>>>> On Wed, Oct 05, 2022, Ferry Toth wrote: >>>>>>>>>>>> Hi, >>>>>>>>>>>> >>>>>>>>>>>> Thanks! >>>>>>>>>>>> >>>>>>>>>>>> Does the failure only happen the first time host is >>>>>>>>>>>> initialized? Or can >>>>>>>>>>>> it recover after switching to device then back to >>>>>>>>>>>> host mode? >>>>>>>>>>>> >>>>>>>>>>>> I can switch back and forth and device mode works each time, >>>>>>>>>>>> host mode remains >>>>>>>>>>>> dead. >>>>>>>>>>> Ok. >>>>>>>>>>> >>>>>>>>>>>> Probably the failure happens if some step(s) in >>>>>>>>>>>> dwc3_core_init() hasn't >>>>>>>>>>>> completed. >>>>>>>>>>>> >>>>>>>>>>>> tusb1210 is a phy driver right? The issue is probably >>>>>>>>>>>> because we didn't >>>>>>>>>>>> initialize the phy yet. So, I suspect placing >>>>>>>>>>>> dwc3_get_extcon() after >>>>>>>>>>>> initializing the phy will probably solve the >>>>>>>>>>>> dependency >>>>>>>>>>>> problem. >>>>>>>>>>>> >>>>>>>>>>>> You can try something for yourself or I can provide >>>>>>>>>>>> something to test >>>>>>>>>>>> later if you don't mind (maybe next week if it's ok). >>>>>>>>>>>> >>>>>>>>>>>> Yes, the code move I mentioned above "moves dwc3_get_extcon() >>>>>>>>>>>> until after >>>>>>>>>>>> dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU >>>>>>>>>>>> initially >>>>>>>>>>>> dwc3_get_extcon() was called from within dwc3_core_init_mode() >>>>>>>>>>>> but only for >>>>>>>>>>>> case USB_DR_MODE_OTG. So with this change order of events is >>>>>>>>>>>> more or less >>>>>>>>>>>> unchanged" solves the issue. >>>>>>>>>>>> >>>>>>>>>>> I saw the experiment you did from the link you provided. We want >>>>>>>>>>> to also >>>>>>>>>>> confirm exactly which step in dwc3_core_init() was needed. >>>>>>>>>> Ok. I first tried the code move suggested by Andrey (didn't >>>>>>>>>> work). >>>>>>>>>> Then >>>>>>>>>> after reading the actual code I moved a bit further. >>>>>>>>>> >>>>>>>>>> This move was on top of -rc6 without any reverts. I did not make >>>>>>>>>> additional >>>>>>>>>> changes to dwc3_core_init() >>>>>>>>>> >>>>>>>>>> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - >>>>>>>>>> dwc3_core_init - .. - dwc3_core_init_mode (not working) >>>>>>>>>> >>>>>>>>>> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - >>>>>>>>>> dwc3_core_init - .. >>>>>>>>>> - dwc3_core_init_mode (no change) >>>>>>>>>> >>>>>>>>>> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - >>>>>>>>>> dwc3_get_extcon - >>>>>>>>>> dwc3_core_init_mode (works) >>>>>>>>>> >>>>>>>>>> .. are what I believe for this issue irrelevant calls to >>>>>>>>>> dwc3_alloc_scratch_buffers, dwc3_check_params and >>>>>>>>>> dwc3_debugfs_init. >>>>>>>>>> >>>>>>>>> Right. Thanks for narrowing it down. There are still many steps in >>>>>>>>> dwc3_core_init(). We have some suspicion, but we still haven't >>>>>>>>> confirmed >>>>>>>>> the exact cause of the failure. We can write a proper patch >>>>>>>>> once we >>>>>>>>> know >>>>>>>>> the reason. >>>>>>>> If you would like me to test your suspicion, just tell me what >>>>>>>> to do >>>>>>>> :-) >>>>>>> OK, Ferry, I think I'm going to need clarification on specifics on >>>>>>> your test setup. Can you share your kernel config, maybe your >>>>>>> "/proc/config.gz", somewhere? When you say you are running vanilla >>>>>>> Linux, do you mean it or do you mean vanilla tree + some patch >>>>>>> delta? >>>>>> For v6.0 I can get the exacts tonight. But earlier I had this for >>>>>> v5.17: >>>>>> >>>>>> https://github.com/htot/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_5.17.bb >>>>>> >>>>>> >>>>>> There are 2 patches referred in #67 and #68. One is related to the >>>>>> infinite loop. The other is I believe also needed to get dwc3 to >>>>>> work. >>>>>> >>>>>> All the kernel config are applied as .cfg. >>>>>> >>>>>> Patches and cfs's here: >>>>>> >>>>>> https://github.com/htot/meta-intel-edison/tree/master/meta-intel-edison-bsp/recipes-kernel/linux/files >>>>>> >>>>> Updated Yocto recipe for v6.0 here: >>>>> >>>>> https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb >>>>> >>>>> #75-#77 are the 2 reverts from Andy, + one SOF revert (not related to >>>>> this thread). >>>> Please drop all of this >>>> https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb#L69-L77 >>>> and re do the testing. Assuming things are still broken, that's how >>>> you want to do the bisecting. >>> I removed 4 patches: >>> 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch >>> 0044-REVERTME-usb-dwc3-gadget-skip-endpoints-ep-18-in-out.patch >>> 0001-Revert-USB-fixup-for-merge-issue-with-usb-dwc3-Don-t.patch >>> 0001-Revert-usb-dwc3-Don-t-switch-OTG-peripheral-if-extco.patch >> Please remove all custom patches so we are on the same page. I don't >> suspect the 8250 related changes to affect anything, but I also would >> like to be testing the same thing. I'm testing vanilla v6.0 > Alright, but don't expect any change. The 8250 patches are related to > using DMA for the serial ports (except the console). It may affect > bluetooth, the serial port on the arduino connector, but not the console. >>> and indeed as you expect kernel boots (no infinite loop). However dwc3 >>> host mode is not working as in your case, device mode works fine (Yocto >>> configures a set of gadgets for me). With vanilla v6.0 there is no probe loop but still host mode does not work. >> What do you do to test host mode working? lsusb? Something else? >> Asking to make sure I'm doing something equivalent on my end. root@yuna:~# lsusb Bus 002 Device 001: ID 1d6b:0003 Linux Foundation 3.0 root hub Bus 001 Device 001: ID 1d6b:0002 Linux Foundation 2.0 root hub This is with smsc95xx plugged in - no leds on except power/ > I have a smsc95xx 4p usb hub with 1 eth port continuously plugged. It > has leds on all ports so when it works it lights up like a Christmas tree. > > But I also tried plugging a usb stick. > > It maybe that lsusb is not enough. Iirc the root hub is there, but the > tusb1210 not and then device plugs are not detected. So in my case none > of the leds on the hub turn on. root@yuna:~# dmesg | grep -i tusb root@yuna:~# >>> Just to be sure if I could have bisected without 0043a I added back the >>> 2 0001-Revert* and indeed I run into the infinite loop with the console >>> spitting out continuous: >>> debugfs: Directory 'dwc3.0.auto' with parent 'ulpi' already present! >>> tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 to reg 0x80 >>> >>> so yes it seems either 0043b or your patch "usb: dwc3: Don't switch OTG >>> -> peripheral if extcon is present" is needed to boot (break the >>> infinite loop). But your patch is in my case not sufficient to make host >>> mode work. >>> >> Next step would be to establish if USB is working before my patch. You >> should be able to avoid the boot loop if you disable the >> "phy-tusb1210" driver. The driver fails to probe anyway, so it's not >> very likely to be crucial for functioning, so it should allow you to >> try things with my patch reverted: > > You lost me here. With "boot loop" you mean "probe loop" right? Why do > you think the tusb1210 driver is not crucial? Nevertheless tried: with tusb1210 disabled and your patch reverted #SRC_URI:append = " file://0038-enable-PHY_TUSB1210.cfg" SRC_URI:append = " file://0001-Revert-USB-fixup-for-merge-issue-with-usb-dwc3-Don-t.patch" SRC_URI:append = " file://0001-Revert-usb-dwc3-Don-t-switch-OTG-peripheral-if-extco.patch" there is indeed no probe loop as you expect, but host mode still does not work (device mode still works). We need the tusb1210 in host mode. Earlier you asked for my config, here it is: https://drive.google.com/file/d/1aKJWMqiAXnReeLCvxshzjKwGxIWQ7eJk/view?usp=sharing > See "phy: ti: tusb1210: Don't check for write errors when powering on" > > It should not be failing to probe (and with Andy's "Break-infinite-loop" > patch is doesn't) as without the tusb1210 usb host mode won't work as > device plugs are not detected. > > Earlier in this thread we had: > > "The effect of the patch is that on Merrifield (I tested with Intel > Edison Arduino board which has a HW switch to select between host and > device mode) device mode works but in host mode USB is completely not > working. > > Currently on host mode - when working - superfluous error messages from > tusb1210 appear. When host mode is not working there are no tusb1210 > messages in the logs / on the console at all. Seemingly tusb1210 is not > probed, which points in the direction of a relation to extcon." > >> git revert 8bd6b8c4b100 0f0101719138 >> >> After that, if things start working, it'd make sense to re-do your >> function re-arranging experiment to re-validate it. >> >>> As I understand it depends a bit on the timing, I might have a different >>> initrd (built by Yocto vs. Buildroot). F.i. I see I have >>> extcon-intel-mrfld in initrd and dwc3 / phy-tusb1210 built-in. >>> >> You mentioned that your rootfs image does some gadget configuration >> for you. Can this be disabled? If yes, it'd make sense to check if >> this could be a variable explaining the difference. I notice when flipping switch to device mode, gadgets pop up. Then switching back to host, console (and dmesg) show: root@yuna:~# dwc3 dwc3.0.auto: request 000000004e7f118e was not queued to ep5in dwc3 dwc3.0.auto: request 000000003c6215ba was not queued to ep4out dwc3 dwc3.0.auto: request 000000005270315b was not queued to ep4out dwc3 dwc3.0.auto: request 000000001d456f53 was not queued to ep6in dwc3 dwc3.0.auto: request 000000001f17ddc6 was not queued to ep6in This is new and caused by dropping "REVERTME: usb: dwc3: gadget: skip endpoints ep[18]{in,out}". I think we need to keep this one. > This is done through configfs only when the switch is set to device mode. >> What U-Boot version are you running? AFACT U-Boot will touch that >> particular IP block, so this might be somewhat relevant. > IIRC if have v2022.04 but tested v2021.10 earlier (no difference). I am indeed on v2022.04 with 1 patch on top "REVERTME: usb: dwc3: gadget: skip endpoints ep[18]{in,out}"
On Wed, Oct 12, 2022 at 2:30 AM Ferry Toth <fntoth@gmail.com> wrote: > > Hi > > On 11-10-2022 22:50, Andrey Smirnov wrote: > > On Tue, Oct 11, 2022 at 11:54 AM Ferry Toth <fntoth@gmail.com> wrote: > >> Hi, > >> > >> Op 10-10-2022 om 23:35 schreef Andrey Smirnov: > >>> On Mon, Oct 10, 2022 at 1:52 PM Ferry Toth <fntoth@gmail.com> wrote: > >>>> Hi > >>>> > >>>> Op 10-10-2022 om 13:04 schreef Ferry Toth: > >>>>> Hi > >>>>> > >>>>> On 10-10-2022 07:02, Andrey Smirnov wrote: > >>>>>> On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: > >>>>>>> On 07-10-2022 04:11, Thinh Nguyen wrote: > >>>>>>>> On Thu, Oct 06, 2022, Ferry Toth wrote: > >>>>>>>>> Hi > >>>>>>>>> > >>>>>>>>> On 06-10-2022 04:12, Thinh Nguyen wrote: > >>>>>>>>>> On Wed, Oct 05, 2022, Ferry Toth wrote: > >>>>>>>>>>> Hi, > >>>>>>>>>>> > >>>>>>>>>>> Thanks! > >>>>>>>>>>> > >>>>>>>>>>> Does the failure only happen the first time host is > >>>>>>>>>>> initialized? Or can > >>>>>>>>>>> it recover after switching to device then back to host mode? > >>>>>>>>>>> > >>>>>>>>>>> I can switch back and forth and device mode works each time, > >>>>>>>>>>> host mode remains > >>>>>>>>>>> dead. > >>>>>>>>>> Ok. > >>>>>>>>>> > >>>>>>>>>>> Probably the failure happens if some step(s) in > >>>>>>>>>>> dwc3_core_init() hasn't > >>>>>>>>>>> completed. > >>>>>>>>>>> > >>>>>>>>>>> tusb1210 is a phy driver right? The issue is probably > >>>>>>>>>>> because we didn't > >>>>>>>>>>> initialize the phy yet. So, I suspect placing > >>>>>>>>>>> dwc3_get_extcon() after > >>>>>>>>>>> initializing the phy will probably solve the dependency > >>>>>>>>>>> problem. > >>>>>>>>>>> > >>>>>>>>>>> You can try something for yourself or I can provide > >>>>>>>>>>> something to test > >>>>>>>>>>> later if you don't mind (maybe next week if it's ok). > >>>>>>>>>>> > >>>>>>>>>>> Yes, the code move I mentioned above "moves dwc3_get_extcon() > >>>>>>>>>>> until after > >>>>>>>>>>> dwc3_core_init() but just before dwc3_core_init_mode(). AFAIU > >>>>>>>>>>> initially > >>>>>>>>>>> dwc3_get_extcon() was called from within dwc3_core_init_mode() > >>>>>>>>>>> but only for > >>>>>>>>>>> case USB_DR_MODE_OTG. So with this change order of events is > >>>>>>>>>>> more or less > >>>>>>>>>>> unchanged" solves the issue. > >>>>>>>>>>> > >>>>>>>>>> I saw the experiment you did from the link you provided. We want > >>>>>>>>>> to also > >>>>>>>>>> confirm exactly which step in dwc3_core_init() was needed. > >>>>>>>>> Ok. I first tried the code move suggested by Andrey (didn't work). > >>>>>>>>> Then > >>>>>>>>> after reading the actual code I moved a bit further. > >>>>>>>>> > >>>>>>>>> This move was on top of -rc6 without any reverts. I did not make > >>>>>>>>> additional > >>>>>>>>> changes to dwc3_core_init() > >>>>>>>>> > >>>>>>>>> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - > >>>>>>>>> dwc3_core_init - .. - dwc3_core_init_mode (not working) > >>>>>>>>> > >>>>>>>>> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - > >>>>>>>>> dwc3_core_init - .. > >>>>>>>>> - dwc3_core_init_mode (no change) > >>>>>>>>> > >>>>>>>>> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - > >>>>>>>>> dwc3_get_extcon - > >>>>>>>>> dwc3_core_init_mode (works) > >>>>>>>>> > >>>>>>>>> .. are what I believe for this issue irrelevant calls to > >>>>>>>>> dwc3_alloc_scratch_buffers, dwc3_check_params and dwc3_debugfs_init. > >>>>>>>>> > >>>>>>>> Right. Thanks for narrowing it down. There are still many steps in > >>>>>>>> dwc3_core_init(). We have some suspicion, but we still haven't > >>>>>>>> confirmed > >>>>>>>> the exact cause of the failure. We can write a proper patch once we > >>>>>>>> know > >>>>>>>> the reason. > >>>>>>> If you would like me to test your suspicion, just tell me what to do > >>>>>>> :-) > >>>>>> OK, Ferry, I think I'm going to need clarification on specifics on > >>>>>> your test setup. Can you share your kernel config, maybe your > >>>>>> "/proc/config.gz", somewhere? When you say you are running vanilla > >>>>>> Linux, do you mean it or do you mean vanilla tree + some patch delta? > >>>>> For v6.0 I can get the exacts tonight. But earlier I had this for v5.17: > >>>>> > >>>>> https://github.com/htot/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_5.17.bb > >>>>> > >>>>> > >>>>> There are 2 patches referred in #67 and #68. One is related to the > >>>>> infinite loop. The other is I believe also needed to get dwc3 to work. > >>>>> > >>>>> All the kernel config are applied as .cfg. > >>>>> > >>>>> Patches and cfs's here: > >>>>> > >>>>> https://github.com/htot/meta-intel-edison/tree/master/meta-intel-edison-bsp/recipes-kernel/linux/files > >>>>> > >>>> Updated Yocto recipe for v6.0 here: > >>>> > >>>> https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb > >>>> > >>>> #75-#77 are the 2 reverts from Andy, + one SOF revert (not related to > >>>> this thread). > >>> Please drop all of this > >>> https://github.com/htot/meta-intel-edison/blob/honister/meta-intel-edison-bsp/recipes-kernel/linux/linux-yocto_6.0.bb#L69-L77 > >>> and re do the testing. Assuming things are still broken, that's how > >>> you want to do the bisecting. > >> I removed 4 patches: > >> 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch > >> 0044-REVERTME-usb-dwc3-gadget-skip-endpoints-ep-18-in-out.patch > >> 0001-Revert-USB-fixup-for-merge-issue-with-usb-dwc3-Don-t.patch > >> 0001-Revert-usb-dwc3-Don-t-switch-OTG-peripheral-if-extco.patch > > Please remove all custom patches so we are on the same page. I don't > > suspect the 8250 related changes to affect anything, but I also would > > like to be testing the same thing. I'm testing vanilla v6.0 > Alright, but don't expect any change. The 8250 patches are related to > using DMA for the serial ports (except the console). It may affect > bluetooth, the serial port on the arduino connector, but not the console. Yeah, I don't, but for the sake of thoroughness we may as well be building the same thing. > >> and indeed as you expect kernel boots (no infinite loop). However dwc3 > >> host mode is not working as in your case, device mode works fine (Yocto > >> configures a set of gadgets for me). > > What do you do to test host mode working? lsusb? Something else? > > Asking to make sure I'm doing something equivalent on my end. > > > I have a smsc95xx 4p usb hub with 1 eth port continuously plugged. It > has leds on all ports so when it works it lights up like a Christmas tree. > > But I also tried plugging a usb stick. > > It maybe that lsusb is not enough. Iirc the root hub is there, but the > tusb1210 not and then device plugs are not detected. So in my case none > of the leds on the hub turn on. > Yeah, by lsusb I mean, "plug in a device" and make sure it enumerated and visible in "lsusb". > >> Just to be sure if I could have bisected without 0043a I added back the > >> 2 0001-Revert* and indeed I run into the infinite loop with the console > >> spitting out continuous: > >> debugfs: Directory 'dwc3.0.auto' with parent 'ulpi' already present! > >> tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 to reg 0x80 > >> > >> so yes it seems either 0043b or your patch "usb: dwc3: Don't switch OTG > >> -> peripheral if extcon is present" is needed to boot (break the > >> infinite loop). But your patch is in my case not sufficient to make host > >> mode work. > >> > > Next step would be to establish if USB is working before my patch. You > > should be able to avoid the boot loop if you disable the > > "phy-tusb1210" driver. The driver fails to probe anyway, so it's not > > very likely to be crucial for functioning, so it should allow you to > > try things with my patch reverted: > > You lost me here. With "boot loop" you mean "probe loop" right? Yep > Why do > you think the tusb1210 driver is not crucial? > > See "phy: ti: tusb1210: Don't check for write errors when powering on" > My end goal here is to find a way to test vanilla v6.0 with the two patches reverted on your end. I thought that during my testing I saw tusb1210 print those timeout messages during its probe and that disabling the driver worked to break the loop, but I went back to double check and it doesn't work so scratch that idea. Configuring extcon as a built-in breaks host functionality with or without patches on my end, so I'm not sure it could be a path. I won't have time to try things with 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch until the weekend, meanwhile can you give this diff a try with vanilla (no reverts) v6.0: modified drivers/phy/ti/phy-tusb1210.c @@ -127,6 +127,7 @@ static int tusb1210_set_mode(struct phy *phy, enum phy_mode mode, int submode) u8 reg; ret = tusb1210_ulpi_read(tusb, ULPI_OTG_CTRL, ®); + WARN_ON(ret < 0); if (ret < 0) return ret; @@ -152,7 +153,10 @@ static int tusb1210_set_mode(struct phy *phy, enum phy_mode mode, int submode) } tusb->otg_ctrl = reg; - return tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); + ret = tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); + WARN_ON(ret < 0); + return ret; + } #ifdef CONFIG_POWER_SUPPLY ? I'm curious to see if there's masked errors on your end since dwc3 driver doesn't check for those. > It should not be failing to probe (and with Andy's "Break-infinite-loop" > patch is doesn't) as without the tusb1210 usb host mode won't work as > device plugs are not detected. > > Earlier in this thread we had: > > "The effect of the patch is that on Merrifield (I tested with Intel > Edison Arduino board which has a HW switch to select between host and > device mode) device mode works but in host mode USB is completely not > working. > > Currently on host mode - when working - superfluous error messages from > tusb1210 appear. When host mode is not working there are no tusb1210 > messages in the logs / on the console at all. Seemingly tusb1210 is not > probed, which points in the direction of a relation to extcon." > > > git revert 8bd6b8c4b100 0f0101719138 > > > > After that, if things start working, it'd make sense to re-do your > > function re-arranging experiment to re-validate it. > > > >> As I understand it depends a bit on the timing, I might have a different > >> initrd (built by Yocto vs. Buildroot). F.i. I see I have > >> extcon-intel-mrfld in initrd and dwc3 / phy-tusb1210 built-in. > >> > > You mentioned that your rootfs image does some gadget configuration > > for you. Can this be disabled? If yes, it'd make sense to check if > > this could be a variable explaining the difference. > This is done through configfs only when the switch is set to device mode. Sure, but can it be disabled? We are looking for unknown variables, so excluding this would be a reasonable thing to do. > > What U-Boot version are you running? AFACT U-Boot will touch that > > particular IP block, so this might be somewhat relevant. > IIRC if have v2022.04 but tested v2021.10 earlier (no difference). OK, sounds like we can tick that off the list.
On Wed, Oct 12, 2022 at 3:32 AM Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > > On Tue, Oct 11, 2022 at 01:17:13PM -0700, Andrey Smirnov wrote: > > On Tue, Oct 11, 2022 at 2:21 AM Andy Shevchenko > > <andriy.shevchenko@linux.intel.com> wrote: > > > On Mon, Oct 10, 2022 at 02:40:30PM -0700, Andrey Smirnov wrote: > > > > On Mon, Oct 10, 2022 at 12:13 AM Andy Shevchenko > > > > <andriy.shevchenko@linux.intel.com> wrote: > > > > > On Sun, Oct 09, 2022 at 10:02:26PM -0700, Andrey Smirnov wrote: > > > > > > On Fri, Oct 7, 2022 at 6:07 AM Ferry Toth <fntoth@gmail.com> wrote: > > ... > > > > > > > OK, Ferry, I think I'm going to need clarification on specifics on > > > > > > your test setup. Can you share your kernel config, maybe your > > > > > > "/proc/config.gz", somewhere? When you say you are running vanilla > > > > > > Linux, do you mean it or do you mean vanilla tree + some patch delta? > > > > > > > > > > > > The reason I'm asking is because I'm having a hard time reproducing > > > > > > the problem on my end. In fact, when I build v6.0 > > > > > > (4fe89d07dcc2804c8b562f6c7896a45643d34b2f) and then do a > > > > > > > > > > > > git revert 8bd6b8c4b100 0f0101719138 (original revert proposed by Andy) > > > > > > > > > > > > I get an infinite loop of reprobing that looks something like (some > > > > > > debug tracing, function name + line number, included): > > > > > > > > > > Yes, this is (one of) known drawback(s) of deferred probe hack. I think > > > > > the kernel that Ferry runs has a patch that basically reverts one from > > > > > 2014 [1] and allows to have extcon as a module. (1) > > > > > > > > > > [1]: 58b116bce136 ("drivercore: deferral race condition fix") > > > > > > > > > > > which renders the system completely unusable, but USB host is > > > > > > definitely going to be broken too. Now, ironically, with my patch > > > > > > in-place, an attempt to probe extcon that ends up deferring the probe > > > > > > happens before the ULPI driver failure (which wasn't failing driver > > > > > > probe prior to https://lore.kernel.org/all/20220213130524.18748-7-hdegoede@redhat.com/), > > > > > > there no "driver binding" event that re-triggers deferred probe > > > > > > causing the loop, so the system progresses to a point where extcon is > > > > > > available and dwc3 driver eventually loads. > > > > > > > > > > > > After that, and I don't know if I'm doing the same test, USB host > > > > > > seems to work as expected. lsusb works, my USB stick enumerates as > > > > > > expected. Switching the USB mux to micro-USB and back shuts the host > > > > > > functionality down and brings it up as expected. Now I didn't try to > > > > > > load any gadgets to make sure USB gadget works 100%, but since you > > > > > > were saying it was USB host that was broken, I wasn't concerned with > > > > > > that. Am I doing the right test? > > > > > > > > > > Hmm... What you described above sounds more like a yet another attempt to > > > > > workaround (1). _If_ this is the case, we probably can discuss how to fix > > > > > it in generic way (somewhere in dd.c, rather than in the certain driver). > > > > > > > > No, I'm not describing an attempt to fix anything. Just how vanilla > > > > v6.0 (where my patch is not reverted) works and where my patch, fixing > > > > a logical problem in which extcon was requested too late causing a > > > > forced OTG -> "gadget only" switch, also changed the ordering enough > > > > to accidentally avoid the loop. > > > > > > You still refer to a fix, but my question was if it's the same problem or not. > > > > > > > No, it's not the same problem. > > > > > > > That said, the real test case should be performed on top of clean kernel > > > > > before judging if it's good or bad. > > > > > > > > Given your level of involvemnt with this particular platform and you > > > > being the author of > > > > https://github.com/edison-fw/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/files/0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch > > > > I assumed/expected you to double check this before sending this revert > > > > out. Please do so next time. > > > > > > As I said I have not yet restored my testing environment for that platform and > > > I relied on the Ferry's report. Taking into account the history of breakages > > > that done for Intel Merrifield, in particular by not wide tested patches > > > against DWC3 driver, I immediately react with a revert. > > > > That's what I'm asking you not to do next time. If you don't have time > > to restore your testing env or double check Ferry's work, please live > > with a revert in your local tree until you do. > > I trust Ferry's tests as mine and repeating again, we have a bad history > when people so value their time that breaks our platform, This is not a good excuse to jump the gun and send a revert without double checking. Some regressions will always be unavoidable. > so please test > your changes in the future that it makes no regressions. > This is, in a nutshell, asking me to prove a negative. That's not a feasible request. To add insult to injury, you are talking about a platform way past EOL that's out of stock in every major store and it's by sheer luck that I was able to get the last kit on eBay. Downstream will always be the ultimate test for regressions given the sheer number of permutations. A CI/CD rig that would allow developers to make a regression test run, would make this a much more reasonable request, without it, end-user(s) is the only "test bed" there is. > If you want to have a proof that your patches are broken, then I will > prioritize this. We now have a full release cycle time for that. > You prioritizing this now saves me nothing, whereas you prioritizing this before submitting reverts would've saved me time. That's the point I'm trying to convey. > > My time is as valuable > > as yours and this revert required much more investigation before it > > was submitted. You lived with > > https://github.com/edison-fw/meta-intel-edison/blob/master/meta-intel-edison-bsp/recipes-kernel/linux/files/0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch > > since 5.10, which apparently was needed to either boot or have dwc3, > > so I don't think there is any real urgency. > > It is in my tree only for the purpose of "don't forget that issue". > I think you can work around it by built-in extcon driver. >
<SNIP> > My end goal here is to find a way to test vanilla v6.0 with the two > patches reverted on your end. I thought that during my testing I saw > tusb1210 print those timeout messages during its probe and that > disabling the driver worked to break the loop, but I went back to > double check and it doesn't work so scratch that idea. Configuring > extcon as a built-in breaks host functionality with or without patches > on my end, so I'm not sure it could be a path. > > I won't have time to try things with > 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch until > the weekend, meanwhile can you give this diff a try with vanilla (no > reverts) v6.0: > > modified drivers/phy/ti/phy-tusb1210.c > @@ -127,6 +127,7 @@ static int tusb1210_set_mode(struct phy *phy, enum > phy_mode mode, int submode) > u8 reg; > > ret = tusb1210_ulpi_read(tusb, ULPI_OTG_CTRL, ®); > + WARN_ON(ret < 0); > if (ret < 0) > return ret; > > @@ -152,7 +153,10 @@ static int tusb1210_set_mode(struct phy *phy, > enum phy_mode mode, int submode) > } > > tusb->otg_ctrl = reg; > - return tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); > + ret = tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); > + WARN_ON(ret < 0); > + return ret; > + > } > > #ifdef CONFIG_POWER_SUPPLY > > ? I'm curious to see if there's masked errors on your end since dwc3 > driver doesn't check for those. root@yuna:~# dmesg | grep -i -E 'warn|assert|error|tusb|dwc3' 8250_mid: probe of 0000:00:04.0 failed with error -16 platform regulatory.0: Direct firmware load for regulatory.db failed with error -2 brcmfmac mmc2:0001:1: Direct firmware load for brcm/brcmfmac43340-sdio.Intel Corporation-Merrifield.bin failed with error -2 sof-audio-pci-intel-tng 0000:00:0d.0: error: I/O region is too small. sof-audio-pci-intel-tng 0000:00:0d.0: error: failed to probe DSP -19 >> This is done through configfs only when the switch is set to device mode. > Sure, but can it be disabled? We are looking for unknown variables, so > excluding this would be a reasonable thing to do. It's not enabled until I flip the switch to device mode.
On Thu, Oct 13, 2022 at 12:35 PM Ferry Toth <fntoth@gmail.com> wrote: > > <SNIP> > > My end goal here is to find a way to test vanilla v6.0 with the two > > patches reverted on your end. I thought that during my testing I saw > > tusb1210 print those timeout messages during its probe and that > > disabling the driver worked to break the loop, but I went back to > > double check and it doesn't work so scratch that idea. Configuring > > extcon as a built-in breaks host functionality with or without patches > > on my end, so I'm not sure it could be a path. > > > > I won't have time to try things with > > 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch until > > the weekend, meanwhile can you give this diff a try with vanilla (no > > reverts) v6.0: > > OK, got a chance to try things with that patch. Both v6.0 and v6.0 with my patches reverted work the same, my Kingston DataTraveller USB stick enumerates and works as expected. > > modified drivers/phy/ti/phy-tusb1210.c > > @@ -127,6 +127,7 @@ static int tusb1210_set_mode(struct phy *phy, enum > > phy_mode mode, int submode) > > u8 reg; > > > > ret = tusb1210_ulpi_read(tusb, ULPI_OTG_CTRL, ®); > > + WARN_ON(ret < 0); > > if (ret < 0) > > return ret; > > > > @@ -152,7 +153,10 @@ static int tusb1210_set_mode(struct phy *phy, > > enum phy_mode mode, int submode) > > } > > > > tusb->otg_ctrl = reg; > > - return tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); > > + ret = tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); > > + WARN_ON(ret < 0); > > + return ret; > > + > > } > > > > #ifdef CONFIG_POWER_SUPPLY > > > > ? I'm curious to see if there's masked errors on your end since dwc3 > > driver doesn't check for those. > root@yuna:~# dmesg | grep -i -E 'warn|assert|error|tusb|dwc3' > 8250_mid: probe of 0000:00:04.0 failed with error -16 > platform regulatory.0: Direct firmware load for regulatory.db failed > with error -2 > brcmfmac mmc2:0001:1: Direct firmware load for > brcm/brcmfmac43340-sdio.Intel Corporation-Merrifield.bin failed with > error -2 > sof-audio-pci-intel-tng 0000:00:0d.0: error: I/O region is too small. > sof-audio-pci-intel-tng 0000:00:0d.0: error: failed to probe DSP -19 > > > >> This is done through configfs only when the switch is set to device mode. > > Sure, but can it be disabled? We are looking for unknown variables, so > > excluding this would be a reasonable thing to do. > It's not enabled until I flip the switch to device mode. OK to cut this back and forth short, I think it'd be easier to just ask you to run what I run. Here's vanilla v6.0 bzImage and initrd (built with your config + CONFIG_PHY_TUSB1210=y) I tested with https://drive.google.com/drive/folders/1H28AL1coPPZ2kLTYskDuDdWo-oE7DRPH?usp=sharing let's see how it behaves on your setup. There's also the U-Boot binary I use in that folder in case you want to give it a try. Now on Merrifield dwc3_get_extcon() doesn't do anything but call extcon_get_extcon_dev() which doesn't touch any hardware or interact with other drivers, so assuming > So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - > dwc3_core_init - .. - dwc3_core_init_mode (not working) > > I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - dwc3_core_init - > .. - dwc3_core_init_mode (no change) > > Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - dwc3_get_extcon - > dwc3_core_init_mode (works) still holds(did you double check that with vanilla v6.0?) the only difference that I can see is execution timings. It seems to me it's either an extra delay added by execution of extcon_get_extcon_dev() (unlikely) or multiple partial probes that include dwc3_core_init() that change things. You can try to check the latter by adding an artificial probe deferral point after dwc3_core_init(). Something like (didn't test this): modified drivers/usb/dwc3/core.c @@ -1860,6 +1860,10 @@ static int dwc3_probe(struct platform_device *pdev) goto err3; ret = dwc3_core_init(dwc); + static int deferral_counter = 0; + if (deferral_counter++ < 9) /* I counted 9 deferrals in my testing */ + ret = -EPROBE_DEFER; + if (ret) { dev_err_probe(dev, ret, "failed to initialize core\n"); goto err4;
Op 15-10-2022 om 21:54 schreef Andrey Smirnov: > On Thu, Oct 13, 2022 at 12:35 PM Ferry Toth <fntoth@gmail.com> wrote: >> <SNIP> >>> My end goal here is to find a way to test vanilla v6.0 with the two >>> patches reverted on your end. I thought that during my testing I saw >>> tusb1210 print those timeout messages during its probe and that >>> disabling the driver worked to break the loop, but I went back to >>> double check and it doesn't work so scratch that idea. Configuring >>> extcon as a built-in breaks host functionality with or without patches >>> on my end, so I'm not sure it could be a path. >>> >>> I won't have time to try things with >>> 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch until >>> the weekend, meanwhile can you give this diff a try with vanilla (no >>> reverts) v6.0: >>> > OK, got a chance to try things with that patch. Both v6.0 and v6.0 > with my patches reverted work the same, my Kingston DataTraveller USB > stick enumerates and works as expected. > Iow you don't need the patch at all to get usb to work. There has got to be a difference in our configs. Did you have a chance to look at mine (here: https://drive.google.com/file/d/1aKJWMqiAXnReeLCvxshzjKwGxIWQ7eJk/view?usp=sharing) Else, send me yours. >>> modified drivers/phy/ti/phy-tusb1210.c >>> @@ -127,6 +127,7 @@ static int tusb1210_set_mode(struct phy *phy, enum >>> phy_mode mode, int submode) >>> u8 reg; >>> >>> ret = tusb1210_ulpi_read(tusb, ULPI_OTG_CTRL, ®); >>> + WARN_ON(ret < 0); >>> if (ret < 0) >>> return ret; >>> >>> @@ -152,7 +153,10 @@ static int tusb1210_set_mode(struct phy *phy, >>> enum phy_mode mode, int submode) >>> } >>> >>> tusb->otg_ctrl = reg; >>> - return tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); >>> + ret = tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); >>> + WARN_ON(ret < 0); >>> + return ret; >>> + >>> } >>> >>> #ifdef CONFIG_POWER_SUPPLY >>> >>> ? I'm curious to see if there's masked errors on your end since dwc3 >>> driver doesn't check for those. >> root@yuna:~# dmesg | grep -i -E 'warn|assert|error|tusb|dwc3' >> 8250_mid: probe of 0000:00:04.0 failed with error -16 >> platform regulatory.0: Direct firmware load for regulatory.db failed >> with error -2 >> brcmfmac mmc2:0001:1: Direct firmware load for >> brcm/brcmfmac43340-sdio.Intel Corporation-Merrifield.bin failed with >> error -2 >> sof-audio-pci-intel-tng 0000:00:0d.0: error: I/O region is too small. >> sof-audio-pci-intel-tng 0000:00:0d.0: error: failed to probe DSP -19 >> >> >>>> This is done through configfs only when the switch is set to device mode. >>> Sure, but can it be disabled? We are looking for unknown variables, so >>> excluding this would be a reasonable thing to do. >> It's not enabled until I flip the switch to device mode. > OK to cut this back and forth short, I think it'd be easier to just > ask you to run what I run. Here's vanilla v6.0 bzImage and initrd > (built with your config + CONFIG_PHY_TUSB1210=y) I tested with What do you mean by this? My config is with CONFIG_GENERIC_PHY=y CONFIG_PHY_TUSB1210=y > https://drive.google.com/drive/folders/1H28AL1coPPZ2kLTYskDuDdWo-oE7DRPH?usp=sharing > let's see how it behaves on your setup. There's also the U-Boot binary Ok, it's getting weirder and weirder. The following is with my U-Boot and your kernel/initrd 1) I placed them in /boot which is on my btrfs partition on the emmc (my U-Boot has btrfs enabled) Linux kernel version 6.0.0-edison-acpi-standard (andreysm@neptunefw-builder) #8 SMP PREEMPT_DYNAMIC Sat Oct 15 18:47:19 UTC 2022 Building boot_params at 0x00090000 Loading bzImage at address 100000 (12064480 bytes) Initial RAM disk at linear address 0x06000000, size 25165824 bytes Kernel command line: "quiet root=/dev/mmcblk0p8 rootflags=subvol=@,compress=lzo rootfstype=btrfs console=ttyS2,115200n8 earlyprintk=ttyS2,115200n8,keep loglevel=4 systemd.unit=multi-user.target" Kernel loaded at 00100000, setup_base=00090000 Usb drive is not detected regardless booting with stick plugged or plugging later on. # lsusb Bus 001 Device 001: ID 1d6b:0002 Bus 002 Device 001: ID 1d6b:0003 No TUSB1210 probed # dmesg | grep dwc3 # 2) I placed them in my vfat rescue partition Linux kernel version 6.0.0-edison-acpi-standard (andreysm@neptunefw-builder) #8 SMP PREEMPT_DYNAMIC Sat Oct 15 18:47:19 UTC 2022 Building boot_params at 0x00090000 Loading bzImage at address 100000 (12064480 bytes) Initial RAM disk at linear address 0x06000000, size 25165824 bytes Kernel command line: "debugshell=0 tty1 console=ttyS2,115200n8 root=/dev/mmcblk0p7 rootfstype=vfat systemd.unit=multi-user.target" Kernel loaded at 00100000, setup_base=00090000 Usb drive is detected. # lsusb Bus 001 Device 001: ID 1d6b:0002 Bus 001 Device 002: ID 125f:312b Bus 002 Device 001: ID 1d6b:0003 TUSB1210 probed # dmesg | grep dwc3 [ 8.605845] tusb1210 dwc3.0.auto.ulpi: GPIO lookup for consumer reset [ 8.605876] tusb1210 dwc3.0.auto.ulpi: using ACPI for GPIO lookup [ 8.605927] tusb1210 dwc3.0.auto.ulpi: using lookup tables for GPIO lookup [ 8.605941] tusb1210 dwc3.0.auto.ulpi: No GPIO consumer reset found [ 8.605956] tusb1210 dwc3.0.auto.ulpi: GPIO lookup for consumer cs [ 8.605970] tusb1210 dwc3.0.auto.ulpi: using ACPI for GPIO lookup [ 8.606011] tusb1210 dwc3.0.auto.ulpi: using lookup tables for GPIO lookup [ 8.606024] tusb1210 dwc3.0.auto.ulpi: No GPIO consumer cs found [ 8.669317] tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 to reg 0x80 ## note: options debugshell, root and rootfstype are normally handled by a script in my initrd, so I guess here noop. > I use in that folder in case you want to give it a try. > > Now on Merrifield dwc3_get_extcon() doesn't do anything but call > extcon_get_extcon_dev() which doesn't touch any hardware or interact > with other drivers, so assuming > >> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - >> dwc3_core_init - .. - dwc3_core_init_mode (not working) >> >> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - dwc3_core_init - >> .. - dwc3_core_init_mode (no change) >> >> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - dwc3_get_extcon - >> dwc3_core_init_mode (works) > still holds(did you double check that with vanilla v6.0?) the only I didn't check > difference that I can see is execution timings. It seems to me it's > either an extra delay added by execution of extcon_get_extcon_dev() > (unlikely) or multiple partial probes that include dwc3_core_init() > that change things. You can try to check the latter by adding an > artificial probe deferral point after dwc3_core_init(). Something like > (didn't test this): > > modified drivers/usb/dwc3/core.c > @@ -1860,6 +1860,10 @@ static int dwc3_probe(struct platform_device *pdev) > goto err3; > > ret = dwc3_core_init(dwc); > + static int deferral_counter = 0; > + if (deferral_counter++ < 9) /* I counted 9 deferrals in my testing */ > + ret = -EPROBE_DEFER; > + > if (ret) { > dev_err_probe(dev, ret, "failed to initialize core\n"); > goto err4; Not sure how you wanted this tested. So I assume on vanilla booting from btrfs on eemc. It crashes but maybe the trace is usefull. After crash it continues but no USB appears at all. [ 4.185151] kobject_add_internal failed for dwc3.0.auto.ulpi with -EEXIST, don't try to register things with the same name in the same directory. [ 4.198625] dwc3 dwc3.0.auto: failed to register ULPI interface [ 4.213112] BUG: kernel NULL pointer dereference, address: 0000000000000001 [ 4.220260] #PF: supervisor read access in kernel mode [ 4.225523] #PF: error_code(0x0000) - not-present page [ 4.230785] PGD 0 P4D 0 [ 4.233402] Oops: 0000 [#1] PREEMPT SMP PTI [ 4.237696] CPU: 0 PID: 8 Comm: kworker/u4:0 Not tainted 6.0.0-edison-acpi-standard #1 [ 4.245802] Hardware name: Intel Corporation Merrifield/BODEGA BAY, BIOS 542 2015.01.21:18.19.48 [ 4.254791] Workqueue: events_unbound deferred_probe_work_func [ 4.260793] RIP: 0010:strlen+0x0/0x20 [ 4.264559] Code: b6 07 38 d0 74 14 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 cc cc cc cc 48 89 f8 c3 cc cc cc cc 0f 1f 84 00 00 00 00 00 <80> 3f 00 74 14 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 cc [ 4.283751] RSP: 0018:ffffa9520004bb10 EFLAGS: 00010202 [ 4.289113] RAX: 0000000000000018 RBX: ffff9c5807d77c18 RCX: 0000000000000000 [ 4.296417] RDX: ffffa9520004bb88 RSI: 0000000000000cc0 RDI: 0000000000000001 [ 4.303719] RBP: 0000000000000001 R08: 0000000000000000 R09: ffffa9520004baf0 [ 4.311022] R10: ffffffffffffffff R11: 000000000000000b R12: 0000000000000cc0 [ 4.318323] R13: ffff9c5807d77c18 R14: ffff9c5807d77c18 R15: 000000000000a710 [ 4.325625] FS: 0000000000000000(0000) GS:ffff9c583e200000(0000) knlGS:0000000000000000 [ 4.333907] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4.339793] CR2: 0000000000000001 CR3: 000000001540c000 CR4: 00000000001006f0 [ 4.347098] Call Trace: [ 4.349611] <TASK> [ 4.351771] kstrdup_const+0x2d/0x70 [ 4.355453] kobject_set_name_vargs+0x1e/0x90 [ 4.359939] dev_set_name+0x4e/0x70 [ 4.363534] device_add+0x5b/0x960 [ 4.367036] ? complete_all+0x1b/0x80 [ 4.370808] ulpi_register_interface+0x213/0x290 [ 4.375551] dwc3_ulpi_init+0x17/0x40 [ 4.379314] dwc3_core_init+0xc29/0x1b70 [ 4.383350] dwc3_probe+0x115a/0x1900 [ 4.387122] platform_probe+0x3a/0xa0 [ 4.390892] really_probe+0xdc/0x390 [ 4.394567] ? pm_runtime_barrier+0x4b/0x80 [ 4.398867] __driver_probe_device+0x73/0x170 [ 4.403340] driver_probe_device+0x19/0x90 [ 4.407545] __device_attach_driver+0x80/0x110 [ 4.412105] ? driver_allows_async_probing+0x60/0x60 [ 4.417195] ? driver_allows_async_probing+0x60/0x60 [ 4.422286] bus_for_each_drv+0x79/0xc0 [ 4.426230] __device_attach+0xb7/0x210 [ 4.430169] bus_probe_device+0x89/0xa0 [ 4.434111] deferred_probe_work_func+0x85/0xe0 [ 4.438762] process_one_work+0x1d7/0x3a0 [ 4.440890] EXT4-fs (mmcblk1): recovery complete [ 4.442785] worker_thread+0x48/0x3c0 [ 4.442809] ? rescuer_thread+0x380/0x380 [ 4.442828] kthread+0xe2/0x110 [ 4.447489] EXT4-fs (mmcblk1): mounted filesystem with ordered data mode. Quota mode: none. [ 4.451086] ? kthread_complete_and_exit+0x20/0x20 [ 4.451107] ret_from_fork+0x22/0x30 [ 4.475811] </TASK> [ 4.478052] Modules linked in: mmc_block extcon_intel_mrfld sdhci_pci cqhci sdhci led_class mmc_core intel_soc_pmic_mrfld btrfs libcrc32c xor zlib_deflate raid6_pq zstd_compress [ 4.494312] CR2: 0000000000000001 [ 4.497717] ---[ end trace 0000000000000000 ]---
This revert breaks USB on the Lenovo Yoga C630. sudo cat /sys/kernel/debug/devices_deferred a800000.usb platform: supplier 88e3000.phy not ready 88e2000.phy 88e3000.phy a600000.usb platform: supplier 88e2000.phy not ready Nothing seems to indicate there is any breakage in the dmesg output though, just that deferred probe is pending. qcom-qmp-usb-phy 88e9000.phy: Registered Qcom-QMP phy qcom-qmp-usb-phy 88eb000.phy: Registered Qcom-QMP phy platform a800000.usb: deferred probe pending platform a600000.usb: deferred probe pending -- steev
On Sun, Oct 16, 2022 at 1:59 PM Ferry Toth <fntoth@gmail.com> wrote: > > > Op 15-10-2022 om 21:54 schreef Andrey Smirnov: > > On Thu, Oct 13, 2022 at 12:35 PM Ferry Toth <fntoth@gmail.com> wrote: > >> <SNIP> > >>> My end goal here is to find a way to test vanilla v6.0 with the two > >>> patches reverted on your end. I thought that during my testing I saw > >>> tusb1210 print those timeout messages during its probe and that > >>> disabling the driver worked to break the loop, but I went back to > >>> double check and it doesn't work so scratch that idea. Configuring > >>> extcon as a built-in breaks host functionality with or without patches > >>> on my end, so I'm not sure it could be a path. > >>> > >>> I won't have time to try things with > >>> 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch until > >>> the weekend, meanwhile can you give this diff a try with vanilla (no > >>> reverts) v6.0: > >>> > > OK, got a chance to try things with that patch. Both v6.0 and v6.0 > > with my patches reverted work the same, my Kingston DataTraveller USB > > stick enumerates and works as expected. > > > Iow you don't need the patch at all to get usb to work. There has got to > be a difference in our configs. > My patch? Yeah, it should have zero effect on anything. !DWC3_VER_IS_PRIOR(DWC3, 330A) is false for Merrifield, so the logical change from my patch is a no-op. It's a pure coincidence that it resolved the probe loop that 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch is for. > Did you have a chance to look at mine (here: > https://drive.google.com/file/d/1aKJWMqiAXnReeLCvxshzjKwGxIWQ7eJk/view?usp=sharing) > > Else, send me yours. > I've been using your config in all of the testing. > >>> modified drivers/phy/ti/phy-tusb1210.c > >>> @@ -127,6 +127,7 @@ static int tusb1210_set_mode(struct phy *phy, enum > >>> phy_mode mode, int submode) > >>> u8 reg; > >>> > >>> ret = tusb1210_ulpi_read(tusb, ULPI_OTG_CTRL, ®); > >>> + WARN_ON(ret < 0); > >>> if (ret < 0) > >>> return ret; > >>> > >>> @@ -152,7 +153,10 @@ static int tusb1210_set_mode(struct phy *phy, > >>> enum phy_mode mode, int submode) > >>> } > >>> > >>> tusb->otg_ctrl = reg; > >>> - return tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); > >>> + ret = tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); > >>> + WARN_ON(ret < 0); > >>> + return ret; > >>> + > >>> } > >>> > >>> #ifdef CONFIG_POWER_SUPPLY > >>> > >>> ? I'm curious to see if there's masked errors on your end since dwc3 > >>> driver doesn't check for those. > >> root@yuna:~# dmesg | grep -i -E 'warn|assert|error|tusb|dwc3' > >> 8250_mid: probe of 0000:00:04.0 failed with error -16 > >> platform regulatory.0: Direct firmware load for regulatory.db failed > >> with error -2 > >> brcmfmac mmc2:0001:1: Direct firmware load for > >> brcm/brcmfmac43340-sdio.Intel Corporation-Merrifield.bin failed with > >> error -2 > >> sof-audio-pci-intel-tng 0000:00:0d.0: error: I/O region is too small. > >> sof-audio-pci-intel-tng 0000:00:0d.0: error: failed to probe DSP -19 > >> > >> > >>>> This is done through configfs only when the switch is set to device mode. > >>> Sure, but can it be disabled? We are looking for unknown variables, so > >>> excluding this would be a reasonable thing to do. > >> It's not enabled until I flip the switch to device mode. > > OK to cut this back and forth short, I think it'd be easier to just > > ask you to run what I run. Here's vanilla v6.0 bzImage and initrd > > (built with your config + CONFIG_PHY_TUSB1210=y) I tested with > > What do you mean by this? My config is with > > CONFIG_GENERIC_PHY=y > CONFIG_PHY_TUSB1210=y > $ cat config-6.0.0-edison-acpi-standard | grep 1210 # CONFIG_PHY_TUSB1210 is not set $ md5sum config-6.0.0-edison-acpi-standard 3c989c708302c1f9e73c6113e71aed9d config-6.0.0-edison-acpi-standard I had to manually enable it, that's what I meant by my comment. > > https://drive.google.com/drive/folders/1H28AL1coPPZ2kLTYskDuDdWo-oE7DRPH?usp=sharing > > let's see how it behaves on your setup. There's also the U-Boot binary > > Ok, it's getting weirder and weirder. The following is with my U-Boot > and your kernel/initrd > > 1) I placed them in /boot which is on my btrfs partition on the emmc (my > U-Boot has btrfs enabled) > > Linux kernel version 6.0.0-edison-acpi-standard > (andreysm@neptunefw-builder) #8 SMP PREEMPT_DYNAMIC Sat Oct 15 18:47:19 > UTC 2022 > Building boot_params at 0x00090000 > Loading bzImage at address 100000 (12064480 bytes) > Initial RAM disk at linear address 0x06000000, size 25165824 bytes > Kernel command line: "quiet root=/dev/mmcblk0p8 > rootflags=subvol=@,compress=lzo rootfstype=btrfs console=ttyS2,115200n8 > earlyprintk=ttyS2,115200n8,keep loglevel=4 systemd.unit=multi-user.target" > Kernel loaded at 00100000, setup_base=00090000 > You shouldn't be using root from you storage since: a) the initrd I uploaded is self-containing, it doesn't need anything else b) your local data is another variable we don't want to introduce just "rootfstype=ramfs" should be enough for this and root=/dev/mmcblk0p8 rootflags=subvol=@,compress=lzo rootfstype=btrfs should be dropped. > Usb drive is not detected regardless booting with stick plugged or > plugging later on. > > # lsusb > Bus 001 Device 001: ID 1d6b:0002 > Bus 002 Device 001: ID 1d6b:0003 > > No TUSB1210 probed > > # dmesg | grep dwc3 > # > > 2) I placed them in my vfat rescue partition > > Linux kernel version 6.0.0-edison-acpi-standard > (andreysm@neptunefw-builder) #8 SMP PREEMPT_DYNAMIC Sat Oct 15 18:47:19 > UTC 2022 > Building boot_params at 0x00090000 > Loading bzImage at address 100000 (12064480 bytes) > Initial RAM disk at linear address 0x06000000, size 25165824 bytes > Kernel command line: "debugshell=0 tty1 console=ttyS2,115200n8 > root=/dev/mmcblk0p7 rootfstype=vfat systemd.unit=multi-user.target" > Kernel loaded at 00100000, setup_base=00090000 > > Usb drive is detected. Yep, that's exactly my point about extra variables. So it looks like something in your root btrfs partition is triggering this issue. I don't really know the contents of your root file system, so don't really have any suggestions there. Maybe old kernel modules are getting picked up? Or something else is interfering ¯\_(ツ)_/¯ > > # lsusb > Bus 001 Device 001: ID 1d6b:0002 > Bus 001 Device 002: ID 125f:312b > Bus 002 Device 001: ID 1d6b:0003 > > TUSB1210 probed > > # dmesg | grep dwc3 > [ 8.605845] tusb1210 dwc3.0.auto.ulpi: GPIO lookup for consumer reset > [ 8.605876] tusb1210 dwc3.0.auto.ulpi: using ACPI for GPIO lookup > [ 8.605927] tusb1210 dwc3.0.auto.ulpi: using lookup tables for GPIO > lookup > [ 8.605941] tusb1210 dwc3.0.auto.ulpi: No GPIO consumer reset found > [ 8.605956] tusb1210 dwc3.0.auto.ulpi: GPIO lookup for consumer cs > [ 8.605970] tusb1210 dwc3.0.auto.ulpi: using ACPI for GPIO lookup > [ 8.606011] tusb1210 dwc3.0.auto.ulpi: using lookup tables for GPIO > lookup > [ 8.606024] tusb1210 dwc3.0.auto.ulpi: No GPIO consumer cs found > [ 8.669317] tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 to > reg 0x80 > > ## note: options debugshell, root and rootfstype are normally handled by > a script in my initrd, so I guess here noop. > > > I use in that folder in case you want to give it a try. > > > > Now on Merrifield dwc3_get_extcon() doesn't do anything but call > > extcon_get_extcon_dev() which doesn't touch any hardware or interact > > with other drivers, so assuming > > > >> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - > >> dwc3_core_init - .. - dwc3_core_init_mode (not working) > >> > >> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - dwc3_core_init - > >> .. - dwc3_core_init_mode (no change) > >> > >> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - dwc3_get_extcon - > >> dwc3_core_init_mode (works) > > still holds(did you double check that with vanilla v6.0?) the only > I didn't check > > difference that I can see is execution timings. It seems to me it's > > either an extra delay added by execution of extcon_get_extcon_dev() > > (unlikely) or multiple partial probes that include dwc3_core_init() > > that change things. You can try to check the latter by adding an > > artificial probe deferral point after dwc3_core_init(). Something like > > (didn't test this): > > > > modified drivers/usb/dwc3/core.c > > @@ -1860,6 +1860,10 @@ static int dwc3_probe(struct platform_device *pdev) > > goto err3; > > > > ret = dwc3_core_init(dwc); > > + static int deferral_counter = 0; > > + if (deferral_counter++ < 9) /* I counted 9 deferrals in my testing */ > > + ret = -EPROBE_DEFER; > > + > > if (ret) { > > dev_err_probe(dev, ret, "failed to initialize core\n"); > > goto err4; > > Not sure how you wanted this tested. So I assume on vanilla booting from > btrfs on eemc. It crashes but maybe the trace is usefull. After crash it > continues but no USB appears at all. > I think you'll have to experiment with that code placement to emulate a deferred probe for the old location of "get extcon". I'd focus on figuring out the root filesystem variable first before trying to get this to work. To be explicit, at this point I don't think the revert is really warranted. I'm also happy to reply/help you with suggestions, but you are going to have to start driving this.
Hi, Op 17-10-2022 om 23:20 schreef Andrey Smirnov: > On Sun, Oct 16, 2022 at 1:59 PM Ferry Toth <fntoth@gmail.com> wrote: >> >> Op 15-10-2022 om 21:54 schreef Andrey Smirnov: >>> On Thu, Oct 13, 2022 at 12:35 PM Ferry Toth <fntoth@gmail.com> wrote: >>>> <SNIP> >>>>> My end goal here is to find a way to test vanilla v6.0 with the two >>>>> patches reverted on your end. I thought that during my testing I saw >>>>> tusb1210 print those timeout messages during its probe and that >>>>> disabling the driver worked to break the loop, but I went back to >>>>> double check and it doesn't work so scratch that idea. Configuring >>>>> extcon as a built-in breaks host functionality with or without patches >>>>> on my end, so I'm not sure it could be a path. >>>>> >>>>> I won't have time to try things with >>>>> 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch until >>>>> the weekend, meanwhile can you give this diff a try with vanilla (no >>>>> reverts) v6.0: >>>>> >>> OK, got a chance to try things with that patch. Both v6.0 and v6.0 >>> with my patches reverted work the same, my Kingston DataTraveller USB >>> stick enumerates and works as expected. >>> >> Iow you don't need the patch at all to get usb to work. There has got to >> be a difference in our configs. >> > My patch? Yeah, it should have zero effect on anything. > !DWC3_VER_IS_PRIOR(DWC3, 330A) is false for Merrifield, so the logical > change from my patch is a no-op. It's a pure coincidence that it > resolved the probe loop that > 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch is > for. > >> Did you have a chance to look at mine (here: >> https://drive.google.com/file/d/1aKJWMqiAXnReeLCvxshzjKwGxIWQ7eJk/view?usp=sharing) >> >> Else, send me yours. >> > I've been using your config in all of the testing. > >>>>> modified drivers/phy/ti/phy-tusb1210.c >>>>> @@ -127,6 +127,7 @@ static int tusb1210_set_mode(struct phy *phy, enum >>>>> phy_mode mode, int submode) >>>>> u8 reg; >>>>> >>>>> ret = tusb1210_ulpi_read(tusb, ULPI_OTG_CTRL, ®); >>>>> + WARN_ON(ret < 0); >>>>> if (ret < 0) >>>>> return ret; >>>>> >>>>> @@ -152,7 +153,10 @@ static int tusb1210_set_mode(struct phy *phy, >>>>> enum phy_mode mode, int submode) >>>>> } >>>>> >>>>> tusb->otg_ctrl = reg; >>>>> - return tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); >>>>> + ret = tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); >>>>> + WARN_ON(ret < 0); >>>>> + return ret; >>>>> + >>>>> } >>>>> >>>>> #ifdef CONFIG_POWER_SUPPLY >>>>> >>>>> ? I'm curious to see if there's masked errors on your end since dwc3 >>>>> driver doesn't check for those. >>>> root@yuna:~# dmesg | grep -i -E 'warn|assert|error|tusb|dwc3' >>>> 8250_mid: probe of 0000:00:04.0 failed with error -16 >>>> platform regulatory.0: Direct firmware load for regulatory.db failed >>>> with error -2 >>>> brcmfmac mmc2:0001:1: Direct firmware load for >>>> brcm/brcmfmac43340-sdio.Intel Corporation-Merrifield.bin failed with >>>> error -2 >>>> sof-audio-pci-intel-tng 0000:00:0d.0: error: I/O region is too small. >>>> sof-audio-pci-intel-tng 0000:00:0d.0: error: failed to probe DSP -19 >>>> >>>> >>>>>> This is done through configfs only when the switch is set to device mode. >>>>> Sure, but can it be disabled? We are looking for unknown variables, so >>>>> excluding this would be a reasonable thing to do. >>>> It's not enabled until I flip the switch to device mode. >>> OK to cut this back and forth short, I think it'd be easier to just >>> ask you to run what I run. Here's vanilla v6.0 bzImage and initrd >>> (built with your config + CONFIG_PHY_TUSB1210=y) I tested with >> What do you mean by this? My config is with >> >> CONFIG_GENERIC_PHY=y >> CONFIG_PHY_TUSB1210=y >> > $ cat config-6.0.0-edison-acpi-standard | grep 1210 > # CONFIG_PHY_TUSB1210 is not set > $ md5sum config-6.0.0-edison-acpi-standard > 3c989c708302c1f9e73c6113e71aed9d config-6.0.0-edison-acpi-standard > > I had to manually enable it, that's what I meant by my comment. Unbelievable, seems I uploaded the wrong config. I just double checked to see if any other differences: scripts/diffconfig config-6.0.0-edison-acpi-standard-bad config-6.0.0-edison-acpi-standard-good GENERIC_PHY n -> y PHY_TUSB1210 n -> y > >>> https://drive.google.com/drive/folders/1H28AL1coPPZ2kLTYskDuDdWo-oE7DRPH?usp=sharing >>> let's see how it behaves on your setup. There's also the U-Boot binary >> Ok, it's getting weirder and weirder. The following is with my U-Boot >> and your kernel/initrd >> >> 1) I placed them in /boot which is on my btrfs partition on the emmc (my >> U-Boot has btrfs enabled) >> >> Linux kernel version 6.0.0-edison-acpi-standard >> (andreysm@neptunefw-builder) #8 SMP PREEMPT_DYNAMIC Sat Oct 15 18:47:19 >> UTC 2022 >> Building boot_params at 0x00090000 >> Loading bzImage at address 100000 (12064480 bytes) >> Initial RAM disk at linear address 0x06000000, size 25165824 bytes >> Kernel command line: "quiet root=/dev/mmcblk0p8 >> rootflags=subvol=@,compress=lzo rootfstype=btrfs console=ttyS2,115200n8 >> earlyprintk=ttyS2,115200n8,keep loglevel=4 systemd.unit=multi-user.target" >> Kernel loaded at 00100000, setup_base=00090000 >> > You shouldn't be using root from you storage since: > a) the initrd I uploaded is self-containing, it doesn't need anything else Yes I know. With the Yocto image we build our own that does switchroot. Here I am inside your buildroot initrd, no fs from the emmc are mounted. According to dmesg btrfs module is loaded later then dwc3, and scans (finds) the btrfs partition in all cases without mounting. > b) your local data is another variable we don't want to introduce > > just "rootfstype=ramfs" should be enough for this and > > root=/dev/mmcblk0p8 rootflags=subvol=@,compress=lzo rootfstype=btrfs > > should be dropped. After some experimenting it appears "rootfstype=btrfs" causes the buildroot rootfs to fail probing tsub1210. I think you should be able to reproduce this. However, changing "rootfstype=ramfs" for my (yocto) image (which probably should be the right thing to do now I think about it) does not resolve the failing to probe tsub1210. Comparing the dmesg with the buildroot one shows that in my case a lot of stuff happens prior to dwc3: raid6 does speed testing (this is used by btrfs) btrfs is loaded sdhci probed acpi tables (for edison-arduino) loaded into configfs external gpio muxes setup finally xhci (tusb1210 is before this on the buildroot image) > >> Usb drive is not detected regardless booting with stick plugged or >> plugging later on. >> >> # lsusb >> Bus 001 Device 001: ID 1d6b:0002 >> Bus 002 Device 001: ID 1d6b:0003 >> >> No TUSB1210 probed >> >> # dmesg | grep dwc3 >> # >> >> 2) I placed them in my vfat rescue partition >> >> Linux kernel version 6.0.0-edison-acpi-standard >> (andreysm@neptunefw-builder) #8 SMP PREEMPT_DYNAMIC Sat Oct 15 18:47:19 >> UTC 2022 >> Building boot_params at 0x00090000 >> Loading bzImage at address 100000 (12064480 bytes) >> Initial RAM disk at linear address 0x06000000, size 25165824 bytes >> Kernel command line: "debugshell=0 tty1 console=ttyS2,115200n8 >> root=/dev/mmcblk0p7 rootfstype=vfat systemd.unit=multi-user.target" >> Kernel loaded at 00100000, setup_base=00090000 >> >> Usb drive is detected. > Yep, that's exactly my point about extra variables. So it looks like > something in your root btrfs partition is triggering this issue. I > don't really know the contents of your root file system, so don't > really have any suggestions there. Maybe old kernel modules are > getting picked up? Or something else is interfering ¯\_(ツ)_/¯ > >> # lsusb >> Bus 001 Device 001: ID 1d6b:0002 >> Bus 001 Device 002: ID 125f:312b >> Bus 002 Device 001: ID 1d6b:0003 >> >> TUSB1210 probed >> >> # dmesg | grep dwc3 >> [ 8.605845] tusb1210 dwc3.0.auto.ulpi: GPIO lookup for consumer reset >> [ 8.605876] tusb1210 dwc3.0.auto.ulpi: using ACPI for GPIO lookup >> [ 8.605927] tusb1210 dwc3.0.auto.ulpi: using lookup tables for GPIO >> lookup >> [ 8.605941] tusb1210 dwc3.0.auto.ulpi: No GPIO consumer reset found >> [ 8.605956] tusb1210 dwc3.0.auto.ulpi: GPIO lookup for consumer cs >> [ 8.605970] tusb1210 dwc3.0.auto.ulpi: using ACPI for GPIO lookup >> [ 8.606011] tusb1210 dwc3.0.auto.ulpi: using lookup tables for GPIO >> lookup >> [ 8.606024] tusb1210 dwc3.0.auto.ulpi: No GPIO consumer cs found >> [ 8.669317] tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 to >> reg 0x80 >> >> ## note: options debugshell, root and rootfstype are normally handled by >> a script in my initrd, so I guess here noop. >> >>> I use in that folder in case you want to give it a try. >>> >>> Now on Merrifield dwc3_get_extcon() doesn't do anything but call >>> extcon_get_extcon_dev() which doesn't touch any hardware or interact >>> with other drivers, so assuming >>> >>>> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - >>>> dwc3_core_init - .. - dwc3_core_init_mode (not working) >>>> >>>> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - dwc3_core_init - >>>> .. - dwc3_core_init_mode (no change) >>>> >>>> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - dwc3_get_extcon - >>>> dwc3_core_init_mode (works) >>> still holds(did you double check that with vanilla v6.0?) the only >> I didn't check >>> difference that I can see is execution timings. It seems to me it's >>> either an extra delay added by execution of extcon_get_extcon_dev() >>> (unlikely) or multiple partial probes that include dwc3_core_init() >>> that change things. You can try to check the latter by adding an >>> artificial probe deferral point after dwc3_core_init(). Something like >>> (didn't test this): >>> >>> modified drivers/usb/dwc3/core.c >>> @@ -1860,6 +1860,10 @@ static int dwc3_probe(struct platform_device *pdev) >>> goto err3; >>> >>> ret = dwc3_core_init(dwc); >>> + static int deferral_counter = 0; >>> + if (deferral_counter++ < 9) /* I counted 9 deferrals in my testing */ >>> + ret = -EPROBE_DEFER; >>> + >>> if (ret) { >>> dev_err_probe(dev, ret, "failed to initialize core\n"); >>> goto err4; >> Not sure how you wanted this tested. So I assume on vanilla booting from >> btrfs on eemc. It crashes but maybe the trace is usefull. After crash it >> continues but no USB appears at all. >> > I think you'll have to experiment with that code placement to emulate > a deferred probe for the old location of "get extcon". I'd focus on > figuring out the root filesystem variable first before trying to get > this to work. Yes, did that as described above. I think that "rootfstype=btrfs" causes some ordering issue, like as if xhci goes to soon. It goes before: spi_master spi5: GPIO lookup for consumer cs while tusb1210 when it does probe starts with: tusb1210 dwc3.0.auto.ulpi: GPIO lookup for consumer reset and xhci follow later. > To be explicit, at this point I don't think the revert is really > warranted. I'm also happy to reply/help you with suggestions, but you > are going to have to start driving this. I agree that reverting based on a "regression" can not be concluded here as dwc3 on merrifield never worked without an out-of-tree patch. And your patch makes that out-of-tree patch obsolete - that's a good thing. But I do think your patch is exposing an older issue that makes dwc3 sensitive to ordering. I would very much appreciate if you could try "rootfstype=btrfs" to reproduce. It think it would be a good thing to resolve it so that the effort here has not been for nothing. My next step will be to move around the code placement as you suggest. (I can spend a few hours in the evenings only as this is not my day job, so explains if I'm a bit slow to respond here).
Op 18-10-2022 om 22:47 schreef Ferry Toth: > Hi, > > Op 17-10-2022 om 23:20 schreef Andrey Smirnov: >> On Sun, Oct 16, 2022 at 1:59 PM Ferry Toth <fntoth@gmail.com> wrote: >>> >>> Op 15-10-2022 om 21:54 schreef Andrey Smirnov: >>>> On Thu, Oct 13, 2022 at 12:35 PM Ferry Toth <fntoth@gmail.com> wrote: >>>>> <SNIP> >>>>>> My end goal here is to find a way to test vanilla v6.0 with the two >>>>>> patches reverted on your end. I thought that during my testing I saw >>>>>> tusb1210 print those timeout messages during its probe and that >>>>>> disabling the driver worked to break the loop, but I went back to >>>>>> double check and it doesn't work so scratch that idea. Configuring >>>>>> extcon as a built-in breaks host functionality with or without >>>>>> patches >>>>>> on my end, so I'm not sure it could be a path. >>>>>> >>>>>> I won't have time to try things with >>>>>> 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch >>>>>> until >>>>>> the weekend, meanwhile can you give this diff a try with vanilla (no >>>>>> reverts) v6.0: >>>>>> >>>> OK, got a chance to try things with that patch. Both v6.0 and v6.0 >>>> with my patches reverted work the same, my Kingston DataTraveller USB >>>> stick enumerates and works as expected. >>>> >>> Iow you don't need the patch at all to get usb to work. There has got to >>> be a difference in our configs. >>> >> My patch? Yeah, it should have zero effect on anything. >> !DWC3_VER_IS_PRIOR(DWC3, 330A) is false for Merrifield, so the logical >> change from my patch is a no-op. It's a pure coincidence that it >> resolved the probe loop that >> 0043b-TODO-driver-core-Break-infinite-loop-when-deferred-p.patch is >> for. >> >>> Did you have a chance to look at mine (here: >>> https://drive.google.com/file/d/1aKJWMqiAXnReeLCvxshzjKwGxIWQ7eJk/view?usp=sharing) >>> >>> Else, send me yours. >>> >> I've been using your config in all of the testing. >> >>>>>> modified drivers/phy/ti/phy-tusb1210.c >>>>>> @@ -127,6 +127,7 @@ static int tusb1210_set_mode(struct phy *phy, >>>>>> enum >>>>>> phy_mode mode, int submode) >>>>>> u8 reg; >>>>>> >>>>>> ret = tusb1210_ulpi_read(tusb, ULPI_OTG_CTRL, ®); >>>>>> + WARN_ON(ret < 0); >>>>>> if (ret < 0) >>>>>> return ret; >>>>>> >>>>>> @@ -152,7 +153,10 @@ static int tusb1210_set_mode(struct phy *phy, >>>>>> enum phy_mode mode, int submode) >>>>>> } >>>>>> >>>>>> tusb->otg_ctrl = reg; >>>>>> - return tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); >>>>>> + ret = tusb1210_ulpi_write(tusb, ULPI_OTG_CTRL, reg); >>>>>> + WARN_ON(ret < 0); >>>>>> + return ret; >>>>>> + >>>>>> } >>>>>> >>>>>> #ifdef CONFIG_POWER_SUPPLY >>>>>> >>>>>> ? I'm curious to see if there's masked errors on your end since dwc3 >>>>>> driver doesn't check for those. >>>>> root@yuna:~# dmesg | grep -i -E 'warn|assert|error|tusb|dwc3' >>>>> 8250_mid: probe of 0000:00:04.0 failed with error -16 >>>>> platform regulatory.0: Direct firmware load for regulatory.db failed >>>>> with error -2 >>>>> brcmfmac mmc2:0001:1: Direct firmware load for >>>>> brcm/brcmfmac43340-sdio.Intel Corporation-Merrifield.bin failed with >>>>> error -2 >>>>> sof-audio-pci-intel-tng 0000:00:0d.0: error: I/O region is too small. >>>>> sof-audio-pci-intel-tng 0000:00:0d.0: error: failed to probe DSP -19 >>>>> >>>>> >>>>>>> This is done through configfs only when the switch is set to >>>>>>> device mode. >>>>>> Sure, but can it be disabled? We are looking for unknown >>>>>> variables, so >>>>>> excluding this would be a reasonable thing to do. >>>>> It's not enabled until I flip the switch to device mode. >>>> OK to cut this back and forth short, I think it'd be easier to just >>>> ask you to run what I run. Here's vanilla v6.0 bzImage and initrd >>>> (built with your config + CONFIG_PHY_TUSB1210=y) I tested with >>> What do you mean by this? My config is with >>> >>> CONFIG_GENERIC_PHY=y >>> CONFIG_PHY_TUSB1210=y >>> >> $ cat config-6.0.0-edison-acpi-standard | grep 1210 >> # CONFIG_PHY_TUSB1210 is not set >> $ md5sum config-6.0.0-edison-acpi-standard >> 3c989c708302c1f9e73c6113e71aed9d config-6.0.0-edison-acpi-standard >> >> I had to manually enable it, that's what I meant by my comment. > > Unbelievable, seems I uploaded the wrong config. I just double checked > to see if any other differences: > > scripts/diffconfig config-6.0.0-edison-acpi-standard-bad > config-6.0.0-edison-acpi-standard-good > GENERIC_PHY n -> y > PHY_TUSB1210 n -> y > >> >>>> https://drive.google.com/drive/folders/1H28AL1coPPZ2kLTYskDuDdWo-oE7DRPH?usp=sharing >>>> let's see how it behaves on your setup. There's also the U-Boot binary >>> Ok, it's getting weirder and weirder. The following is with my U-Boot >>> and your kernel/initrd >>> >>> 1) I placed them in /boot which is on my btrfs partition on the emmc (my >>> U-Boot has btrfs enabled) >>> >>> Linux kernel version 6.0.0-edison-acpi-standard >>> (andreysm@neptunefw-builder) #8 SMP PREEMPT_DYNAMIC Sat Oct 15 18:47:19 >>> UTC 2022 >>> Building boot_params at 0x00090000 >>> Loading bzImage at address 100000 (12064480 bytes) >>> Initial RAM disk at linear address 0x06000000, size 25165824 bytes >>> Kernel command line: "quiet root=/dev/mmcblk0p8 >>> rootflags=subvol=@,compress=lzo rootfstype=btrfs console=ttyS2,115200n8 >>> earlyprintk=ttyS2,115200n8,keep loglevel=4 >>> systemd.unit=multi-user.target" >>> Kernel loaded at 00100000, setup_base=00090000 >>> >> You shouldn't be using root from you storage since: >> a) the initrd I uploaded is self-containing, it doesn't need >> anything else > > Yes I know. With the Yocto image we build our own that does switchroot. > > Here I am inside your buildroot initrd, no fs from the emmc are mounted. > According to dmesg btrfs module is loaded later then dwc3, and scans > (finds) the btrfs partition in all cases without mounting. > >> b) your local data is another variable we don't want to introduce >> >> just "rootfstype=ramfs" should be enough for this and >> >> root=/dev/mmcblk0p8 rootflags=subvol=@,compress=lzo rootfstype=btrfs >> >> should be dropped. > > After some experimenting it appears "rootfstype=btrfs" causes the > buildroot rootfs to fail probing tsub1210. > > I think you should be able to reproduce this. > > However, changing "rootfstype=ramfs" for my (yocto) image (which > probably should be the right thing to do now I think about it) does not > resolve the failing to probe tsub1210. Comparing the dmesg with the > buildroot one shows that in my case a lot of stuff happens prior to dwc3: > > raid6 does speed testing (this is used by btrfs) > > btrfs is loaded > > sdhci probed > > acpi tables (for edison-arduino) loaded into configfs > > external gpio muxes setup > > finally xhci (tusb1210 is before this on the buildroot image) > > >> >>> Usb drive is not detected regardless booting with stick plugged or >>> plugging later on. >>> >>> # lsusb >>> Bus 001 Device 001: ID 1d6b:0002 >>> Bus 002 Device 001: ID 1d6b:0003 >>> >>> No TUSB1210 probed >>> >>> # dmesg | grep dwc3 >>> # >>> >>> 2) I placed them in my vfat rescue partition >>> >>> Linux kernel version 6.0.0-edison-acpi-standard >>> (andreysm@neptunefw-builder) #8 SMP PREEMPT_DYNAMIC Sat Oct 15 18:47:19 >>> UTC 2022 >>> Building boot_params at 0x00090000 >>> Loading bzImage at address 100000 (12064480 bytes) >>> Initial RAM disk at linear address 0x06000000, size 25165824 bytes >>> Kernel command line: "debugshell=0 tty1 console=ttyS2,115200n8 >>> root=/dev/mmcblk0p7 rootfstype=vfat systemd.unit=multi-user.target" >>> Kernel loaded at 00100000, setup_base=00090000 >>> >>> Usb drive is detected. >> Yep, that's exactly my point about extra variables. So it looks like >> something in your root btrfs partition is triggering this issue. I >> don't really know the contents of your root file system, so don't >> really have any suggestions there. Maybe old kernel modules are >> getting picked up? Or something else is interfering ¯\_(ツ)_/¯ >> >>> # lsusb >>> Bus 001 Device 001: ID 1d6b:0002 >>> Bus 001 Device 002: ID 125f:312b >>> Bus 002 Device 001: ID 1d6b:0003 >>> >>> TUSB1210 probed >>> >>> # dmesg | grep dwc3 >>> [ 8.605845] tusb1210 dwc3.0.auto.ulpi: GPIO lookup for consumer reset >>> [ 8.605876] tusb1210 dwc3.0.auto.ulpi: using ACPI for GPIO lookup >>> [ 8.605927] tusb1210 dwc3.0.auto.ulpi: using lookup tables for GPIO >>> lookup >>> [ 8.605941] tusb1210 dwc3.0.auto.ulpi: No GPIO consumer reset found >>> [ 8.605956] tusb1210 dwc3.0.auto.ulpi: GPIO lookup for consumer cs >>> [ 8.605970] tusb1210 dwc3.0.auto.ulpi: using ACPI for GPIO lookup >>> [ 8.606011] tusb1210 dwc3.0.auto.ulpi: using lookup tables for GPIO >>> lookup >>> [ 8.606024] tusb1210 dwc3.0.auto.ulpi: No GPIO consumer cs found >>> [ 8.669317] tusb1210 dwc3.0.auto.ulpi: error -110 writing val 0x41 to >>> reg 0x80 >>> >>> ## note: options debugshell, root and rootfstype are normally handled by >>> a script in my initrd, so I guess here noop. >>> >>>> I use in that folder in case you want to give it a try. >>>> >>>> Now on Merrifield dwc3_get_extcon() doesn't do anything but call >>>> extcon_get_extcon_dev() which doesn't touch any hardware or interact >>>> with other drivers, so assuming >>>> >>>>> So current v6.0 has: dwc3_get_extcon - dwc3_get_dr_mode - ... - >>>>> dwc3_core_init - .. - dwc3_core_init_mode (not working) >>>>> >>>>> I changed to: dwc3_get_dr_mode - dwc3_get_extcon - .. - >>>>> dwc3_core_init - >>>>> .. - dwc3_core_init_mode (no change) >>>>> >>>>> Then to: dwc3_get_dr_mode - .. - dwc3_core_init - .. - >>>>> dwc3_get_extcon - >>>>> dwc3_core_init_mode (works) >>>> still holds(did you double check that with vanilla v6.0?) the only >>> I didn't check >>>> difference that I can see is execution timings. It seems to me it's >>>> either an extra delay added by execution of extcon_get_extcon_dev() >>>> (unlikely) or multiple partial probes that include dwc3_core_init() >>>> that change things. You can try to check the latter by adding an >>>> artificial probe deferral point after dwc3_core_init(). Something like >>>> (didn't test this): >>>> >>>> modified drivers/usb/dwc3/core.c >>>> @@ -1860,6 +1860,10 @@ static int dwc3_probe(struct platform_device >>>> *pdev) >>>> goto err3; >>>> >>>> ret = dwc3_core_init(dwc); >>>> + static int deferral_counter = 0; >>>> + if (deferral_counter++ < 9) /* I counted 9 deferrals in my testing */ >>>> + ret = -EPROBE_DEFER; >>>> + >>>> if (ret) { >>>> dev_err_probe(dev, ret, "failed to initialize core\n"); >>>> goto err4; >>> Not sure how you wanted this tested. So I assume on vanilla booting from >>> btrfs on eemc. It crashes but maybe the trace is usefull. After crash it >>> continues but no USB appears at all. >>> >> I think you'll have to experiment with that code placement to emulate >> a deferred probe for the old location of "get extcon". I'd focus on >> figuring out the root filesystem variable first before trying to get >> this to work. > > Yes, did that as described above. I think that "rootfstype=btrfs" causes > some ordering issue, like as if xhci goes to soon. It goes before: > > spi_master spi5: GPIO lookup for consumer cs > > while tusb1210 when it does probe starts with: > > tusb1210 dwc3.0.auto.ulpi: GPIO lookup for consumer reset > > and xhci follow later. > >> To be explicit, at this point I don't think the revert is really >> warranted. I'm also happy to reply/help you with suggestions, but you >> are going to have to start driving this. > > I agree that reverting based on a "regression" can not be concluded here > as dwc3 on merrifield never worked without an out-of-tree patch. And > your patch makes that out-of-tree patch obsolete - that's a good thing. > > But I do think your patch is exposing an older issue that makes dwc3 > sensitive to ordering. I would very much appreciate if you could try > "rootfstype=btrfs" to reproduce. It think it would be a good thing to > resolve it so that the effort here has not been for nothing. > > My next step will be to move around the code placement as you suggest. > (I can spend a few hours in the evenings only as this is not my day job, > so explains if I'm a bit slow to respond here). > Here the results of moving the code around the same as before: --- drivers/usb/dwc3/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index d0237b30c9be..990de5bf1983 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1844,13 +1844,6 @@ static int dwc3_probe(struct platform_device *pdev) goto err2; } - dwc->edev = dwc3_get_extcon(dwc); - if (IS_ERR(dwc->edev)) { - ret = PTR_ERR(dwc->edev); - dev_err_probe(dwc->dev, ret, "failed to get extcon\n"); - goto err3; - } - ret = dwc3_get_dr_mode(dwc); if (ret) goto err3; @@ -1868,6 +1861,13 @@ static int dwc3_probe(struct platform_device *pdev) dwc3_check_params(dwc); dwc3_debugfs_init(dwc); + dwc->edev = dwc3_get_extcon(dwc); + if (IS_ERR(dwc->edev)) { + ret = PTR_ERR(dwc->edev); + dev_err_probe(dwc->dev, ret, "failed to get extcon\n"); + goto err5; + } + ret = dwc3_core_init_mode(dwc); if (ret) goto err5;
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index c2b463469d51..219d797e2230 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -23,7 +23,6 @@ #include <linux/delay.h> #include <linux/dma-mapping.h> #include <linux/of.h> -#include <linux/of_graph.h> #include <linux/acpi.h> #include <linux/pinctrl/consumer.h> #include <linux/reset.h> @@ -86,7 +85,7 @@ static int dwc3_get_dr_mode(struct dwc3 *dwc) * mode. If the controller supports DRD but the dr_mode is not * specified or set to OTG, then set the mode to peripheral. */ - if (mode == USB_DR_MODE_OTG && !dwc->edev && + if (mode == USB_DR_MODE_OTG && (!IS_ENABLED(CONFIG_USB_ROLE_SWITCH) || !device_property_read_bool(dwc->dev, "usb-role-switch")) && !DWC3_VER_IS_PRIOR(DWC3, 330A)) @@ -1668,51 +1667,6 @@ static void dwc3_check_params(struct dwc3 *dwc) } } -static struct extcon_dev *dwc3_get_extcon(struct dwc3 *dwc) -{ - struct device *dev = dwc->dev; - struct device_node *np_phy; - struct extcon_dev *edev = NULL; - const char *name; - - if (device_property_read_bool(dev, "extcon")) - return extcon_get_edev_by_phandle(dev, 0); - - /* - * Device tree platforms should get extcon via phandle. - * On ACPI platforms, we get the name from a device property. - * This device property is for kernel internal use only and - * is expected to be set by the glue code. - */ - if (device_property_read_string(dev, "linux,extcon-name", &name) == 0) { - edev = extcon_get_extcon_dev(name); - if (!edev) - return ERR_PTR(-EPROBE_DEFER); - - return edev; - } - - /* - * Try to get an extcon device from the USB PHY controller's "port" - * node. Check if it has the "port" node first, to avoid printing the - * error message from underlying code, as it's a valid case: extcon - * device (and "port" node) may be missing in case of "usb-role-switch" - * or OTG mode. - */ - np_phy = of_parse_phandle(dev->of_node, "phys", 0); - if (of_graph_is_present(np_phy)) { - struct device_node *np_conn; - - np_conn = of_graph_get_remote_node(np_phy, -1, -1); - if (np_conn) - edev = extcon_find_edev_by_node(np_conn); - of_node_put(np_conn); - } - of_node_put(np_phy); - - return edev; -} - static int dwc3_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1849,13 +1803,6 @@ static int dwc3_probe(struct platform_device *pdev) goto err2; } - dwc->edev = dwc3_get_extcon(dwc); - if (IS_ERR(dwc->edev)) { - ret = PTR_ERR(dwc->edev); - dev_err_probe(dwc->dev, ret, "failed to get extcon\n"); - goto err3; - } - ret = dwc3_get_dr_mode(dwc); if (ret) goto err3; diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c index 039bf241769a..8cad9e7d3368 100644 --- a/drivers/usb/dwc3/drd.c +++ b/drivers/usb/dwc3/drd.c @@ -8,6 +8,7 @@ */ #include <linux/extcon.h> +#include <linux/of_graph.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/property.h> @@ -438,6 +439,51 @@ static int dwc3_drd_notifier(struct notifier_block *nb, return NOTIFY_DONE; } +static struct extcon_dev *dwc3_get_extcon(struct dwc3 *dwc) +{ + struct device *dev = dwc->dev; + struct device_node *np_phy; + struct extcon_dev *edev = NULL; + const char *name; + + if (device_property_read_bool(dev, "extcon")) + return extcon_get_edev_by_phandle(dev, 0); + + /* + * Device tree platforms should get extcon via phandle. + * On ACPI platforms, we get the name from a device property. + * This device property is for kernel internal use only and + * is expected to be set by the glue code. + */ + if (device_property_read_string(dev, "linux,extcon-name", &name) == 0) { + edev = extcon_get_extcon_dev(name); + if (!edev) + return ERR_PTR(-EPROBE_DEFER); + + return edev; + } + + /* + * Try to get an extcon device from the USB PHY controller's "port" + * node. Check if it has the "port" node first, to avoid printing the + * error message from underlying code, as it's a valid case: extcon + * device (and "port" node) may be missing in case of "usb-role-switch" + * or OTG mode. + */ + np_phy = of_parse_phandle(dev->of_node, "phys", 0); + if (of_graph_is_present(np_phy)) { + struct device_node *np_conn; + + np_conn = of_graph_get_remote_node(np_phy, -1, -1); + if (np_conn) + edev = extcon_find_edev_by_node(np_conn); + of_node_put(np_conn); + } + of_node_put(np_phy); + + return edev; +} + #if IS_ENABLED(CONFIG_USB_ROLE_SWITCH) #define ROLE_SWITCH 1 static int dwc3_usb_role_switch_set(struct usb_role_switch *sw, @@ -542,6 +588,10 @@ int dwc3_drd_init(struct dwc3 *dwc) device_property_read_bool(dwc->dev, "usb-role-switch")) return dwc3_setup_role_switch(dwc); + dwc->edev = dwc3_get_extcon(dwc); + if (IS_ERR(dwc->edev)) + return PTR_ERR(dwc->edev); + if (dwc->edev) { dwc->edev_nb.notifier_call = dwc3_drd_notifier; ret = extcon_register_notifier(dwc->edev, EXTCON_USB_HOST,