diff mbox series

[v2] usb: hub: add retry routine after intr URB sumbit error

Message ID 20181119140208.11590-1-nsaenzjulienne@suse.de (mailing list archive)
State New, archived
Headers show
Series [v2] usb: hub: add retry routine after intr URB sumbit error | expand

Commit Message

Nicolas Saenz Julienne Nov. 19, 2018, 2:02 p.m. UTC
The hub sends hot-plug events to the host trough it's interrupt URB. The
driver takes care of completing the URB and re-submitting it. Completion
errors are handled in the hub_event() work, yet submission errors are
ignored, rendering the device unresponsive. All further events are lost.

It is fairly hard to find this issue in the wild, since you have to time
the USB hot-plug event with the URB submission failure. For instance it
could be the system running out of memory or some malfunction in the USB
controller driver. Nevertheless, it's pretty reasonable to think it'll
happen sometime. One can trigger this issue using eBPF's function
override feature (see BCC's inject.py script).

This patch adds a retry routine to the event of a submission error. The
HUB driver will try to re-submit the URB once every second until it's
successful or the HUB is disconnected.

As some USB subsystems already take care of this issue, the
implementation was inspired from usbhid/hid_core.c's.

Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>

---

v2: as per Alan's and Oliver's comments:
  - Rename timer
  - Delete the timer on disconnect
  - Don't reset HUB nor exponential slowdown the timer, 1s fixed retry
    period
  - Check for -ESHUTDOWN prior kicking the timer

 drivers/usb/core/hub.c | 24 +++++++++++++++++++++++-
 drivers/usb/core/hub.h |  1 +
 2 files changed, 24 insertions(+), 1 deletion(-)

Comments

Oliver Neukum Nov. 19, 2018, 2:04 p.m. UTC | #1
On Mo, 2018-11-19 at 15:02 +0100, Nicolas Saenz Julienne wrote:
> 
> +static void hub_retry_irq_urb(struct timer_list *t)
> +{
> +	struct usb_hub *hub = from_timer(hub, t, irq_urb_retry);
> +	int status;
> +
> +	if (hub->disconnected || hub->quiescing)
> +		return;
> +
> +	dev_err(hub->intfdev, "retrying int urb\n");
> +	status = usb_submit_urb(hub->urb, GFP_ATOMIC);
> +	if (status && status != -ENODEV && status != -EPERM &&
> +	    status != -ESHUTDOWN)
> +		mod_timer(&hub->irq_urb_retry,
> +			  jiffies + msecs_to_jiffies(MSEC_PER_SEC));
> +}
> +
>  static void kick_hub_wq(struct usb_hub *hub)
>  {
>  	struct usb_interface *intf;
> @@ -713,8 +729,12 @@ static void hub_irq(struct urb *urb)
>  		return;
>  
>  	status = usb_submit_urb(hub->urb, GFP_ATOMIC);
> -	if (status != 0 && status != -ENODEV && status != -EPERM)
> +	if (status != 0 && status != -ENODEV && status != -EPERM &&
> +	    status != -ESHUTDOWN) {
>  		dev_err(hub->intfdev, "resubmit --> %d\n", status);
> +		mod_timer(&hub->irq_urb_retry,
> +			  jiffies + msecs_to_jiffies(MSEC_PER_SEC));
> +	}
>  }
>  
>  /* USB 2.0 spec Section 11.24.2.3 */
> @@ -1268,6 +1288,7 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type)
>  	}
>  
>  	/* Stop hub_wq and related activity */
> +	del_timer_sync(&hub->irq_urb_retry);

That is a race condition. You kill the timer here, but the URB may
still be in flight. And if it fails, it will restart the error
handler. You have to introduce a flag or poison the URB.

>  	usb_kill_urb(hub->urb);
>  	if (hub->has_indicators)
>  		cancel_delayed_work_sync(&hub->leds);
> 

	Regards
		Oliver
Nicolas Saenz Julienne Nov. 19, 2018, 3:52 p.m. UTC | #2
Hi Oliver,
thanks for the reviews :).

On Mon, 2018-11-19 at 15:04 +0100, Oliver Neukum wrote:
> On Mo, 2018-11-19 at 15:02 +0100, Nicolas Saenz Julienne wrote:
> > +static void hub_retry_irq_urb(struct timer_list *t)
> > +{
> > +	struct usb_hub *hub = from_timer(hub, t, irq_urb_retry);
> > +	int status;
> > +
> > +	if (hub->disconnected || hub->quiescing)
> > +		return;
> > +
> > +	dev_err(hub->intfdev, "retrying int urb\n");
> > +	status = usb_submit_urb(hub->urb, GFP_ATOMIC);
> > +	if (status && status != -ENODEV && status != -EPERM &&
> > +	    status != -ESHUTDOWN)
> > +		mod_timer(&hub->irq_urb_retry,
> > +			  jiffies + msecs_to_jiffies(MSEC_PER_SEC));
> > +}
> > +
> >  static void kick_hub_wq(struct usb_hub *hub)
> >  {
> >  	struct usb_interface *intf;
> > @@ -713,8 +729,12 @@ static void hub_irq(struct urb *urb)
> >  		return;
> >  
> >  	status = usb_submit_urb(hub->urb, GFP_ATOMIC);
> > -	if (status != 0 && status != -ENODEV && status != -EPERM)
> > +	if (status != 0 && status != -ENODEV && status != -EPERM &&
> > +	    status != -ESHUTDOWN) {
> >  		dev_err(hub->intfdev, "resubmit --> %d\n", status);
> > +		mod_timer(&hub->irq_urb_retry,
> > +			  jiffies + msecs_to_jiffies(MSEC_PER_SEC));
> > +	}
> >  }
> >  
> >  /* USB 2.0 spec Section 11.24.2.3 */
> > @@ -1268,6 +1288,7 @@ static void hub_quiesce(struct usb_hub *hub,
> > enum hub_quiescing_type type)
> >  	}
> >  
> >  	/* Stop hub_wq and related activity */
> > +	del_timer_sync(&hub->irq_urb_retry);
> 
> That is a race condition. You kill the timer here, but the URB may
> still be in flight. And if it fails, it will restart the error
> handler. You have to introduce a flag or poison the URB.

I see, wouldn't checking "hub->quiescing" in hub_irq()'s submit error
path do the work? Something the likes of this:

@@ -713,8 +729,12 @@ static void hub_irq(struct urb *urb)
  		return;

 	status = usb_submit_urb(hub->urb, GFP_ATOMIC);
-	if (status != 0 && status != -ENODEV && status != -EPERM)
+	if (status != 0 && status != -ENODEV && status != -EPERM &&
+	    status != -ESHUTDOWN && !hub->quiescing) {
		dev_err(hub->intfdev, "resubmit --> %d\n", status);
+		mod_timer(&hub->irq_urb_retry,
+			  jiffies + msecs_to_jiffies(MSEC_PER_SEC));
+	}

> 
> >  	usb_kill_urb(hub->urb);
> >  	if (hub->has_indicators)
> >  		cancel_delayed_work_sync(&hub->leds);
> > 
> 
> 	Regards
> 		Oliver
> 

Also, looking at this made me realize that there is no real need to
check "hub->disconnected" in the timer's function, since "hub-
>quiescing" is also set in that code path before any actual cleanup.
I'll update it in the next revision.

Regards,
Nicolas
Oliver Neukum Nov. 20, 2018, 7:18 a.m. UTC | #3
On Mo, 2018-11-19 at 16:52 +0100, Nicolas Saenz Julienne wrote:
>  
> > >  /* USB 2.0 spec Section 11.24.2.3 */
> > > @@ -1268,6 +1288,7 @@ static void hub_quiesce(struct usb_hub *hub,
> > > enum hub_quiescing_type type)
> > >  	}
> > >  
> > >  	/* Stop hub_wq and related activity */
> > > +	del_timer_sync(&hub->irq_urb_retry);
> > 
> > That is a race condition. You kill the timer here, but the URB may
> > still be in flight. And if it fails, it will restart the error
> > handler. You have to introduce a flag or poison the URB.
> 
> I see, wouldn't checking "hub->quiescing" in hub_irq()'s submit error
> path do the work? Something the likes of this:
> 
> @@ -713,8 +729,12 @@ static void hub_irq(struct urb *urb)
>   		return;
> 
>  	status = usb_submit_urb(hub->urb, GFP_ATOMIC);
> -	if (status != 0 && status != -ENODEV && status != -EPERM)
> +	if (status != 0 && status != -ENODEV && status != -EPERM &&
> +	    status != -ESHUTDOWN && !hub->quiescing) {

The problem with doing such things is that the interrupt and the
disconnect can run on two different CPUs. Thus if you use a flag
you need to make sure they don't race and you need to get
memory ordering right.

Doable, but not easy.
There is a reason URBs can be poisoned.

	Regards
		Oliver
diff mbox series

Patch

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index c6077d582d29..937ece45ea1f 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -607,6 +607,22 @@  static int hub_port_status(struct usb_hub *hub, int port1,
 				   status, change, NULL);
 }
 
+static void hub_retry_irq_urb(struct timer_list *t)
+{
+	struct usb_hub *hub = from_timer(hub, t, irq_urb_retry);
+	int status;
+
+	if (hub->disconnected || hub->quiescing)
+		return;
+
+	dev_err(hub->intfdev, "retrying int urb\n");
+	status = usb_submit_urb(hub->urb, GFP_ATOMIC);
+	if (status && status != -ENODEV && status != -EPERM &&
+	    status != -ESHUTDOWN)
+		mod_timer(&hub->irq_urb_retry,
+			  jiffies + msecs_to_jiffies(MSEC_PER_SEC));
+}
+
 static void kick_hub_wq(struct usb_hub *hub)
 {
 	struct usb_interface *intf;
@@ -713,8 +729,12 @@  static void hub_irq(struct urb *urb)
 		return;
 
 	status = usb_submit_urb(hub->urb, GFP_ATOMIC);
-	if (status != 0 && status != -ENODEV && status != -EPERM)
+	if (status != 0 && status != -ENODEV && status != -EPERM &&
+	    status != -ESHUTDOWN) {
 		dev_err(hub->intfdev, "resubmit --> %d\n", status);
+		mod_timer(&hub->irq_urb_retry,
+			  jiffies + msecs_to_jiffies(MSEC_PER_SEC));
+	}
 }
 
 /* USB 2.0 spec Section 11.24.2.3 */
@@ -1268,6 +1288,7 @@  static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type)
 	}
 
 	/* Stop hub_wq and related activity */
+	del_timer_sync(&hub->irq_urb_retry);
 	usb_kill_urb(hub->urb);
 	if (hub->has_indicators)
 		cancel_delayed_work_sync(&hub->leds);
@@ -1800,6 +1821,7 @@  static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	INIT_DELAYED_WORK(&hub->leds, led_work);
 	INIT_DELAYED_WORK(&hub->init_work, NULL);
 	INIT_WORK(&hub->events, hub_event);
+	timer_setup(&hub->irq_urb_retry, hub_retry_irq_urb, 0);
 	usb_get_intf(intf);
 	usb_get_dev(hdev);
 
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index 4accfb63f7dc..b0740cf5ef19 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -69,6 +69,7 @@  struct usb_hub {
 	struct delayed_work	leds;
 	struct delayed_work	init_work;
 	struct work_struct      events;
+	struct timer_list	irq_urb_retry;
 	struct usb_port		**ports;
 };