diff mbox series

usb: dwc3: Stop active transfers before halting the controller

Message ID 20200819051739.22123-1-wcheng@codeaurora.org (mailing list archive)
State Superseded
Headers show
Series usb: dwc3: Stop active transfers before halting the controller | expand

Commit Message

Wesley Cheng Aug. 19, 2020, 5:17 a.m. UTC
In the DWC3 databook, for a device initiated disconnect, the driver is
required to send dependxfer commands for any pending transfers.
In addition, before the controller can move to the halted state, the SW
needs to acknowledge any pending events.  If the controller is not halted
properly, there is a chance the controller will continue accessing stale or
freed TRBs and buffers.

Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>

---
Verified fix by adding a check for ETIMEDOUT during the run stop call.
Shell script writing to the configfs UDC file to trigger disconnect and
connect.  Batch script to have PC execute data transfers over adb (ie adb
push)  After a few iterations, we'd run into a scenario where the
controller wasn't halted.  With the following change, no failed halts after
many iterations.
---
 drivers/usb/dwc3/ep0.c    |  2 +-
 drivers/usb/dwc3/gadget.c | 59 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 57 insertions(+), 4 deletions(-)

Comments

Felipe Balbi Aug. 19, 2020, 11:37 a.m. UTC | #1
Hi,

Wesley Cheng <wcheng@codeaurora.org> writes:
> In the DWC3 databook, for a device initiated disconnect, the driver is
> required to send dependxfer commands for any pending transfers.
> In addition, before the controller can move to the halted state, the SW
> needs to acknowledge any pending events.  If the controller is not halted
> properly, there is a chance the controller will continue accessing stale or
> freed TRBs and buffers.
>
> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>
> ---
> Verified fix by adding a check for ETIMEDOUT during the run stop call.
> Shell script writing to the configfs UDC file to trigger disconnect and
> connect.  Batch script to have PC execute data transfers over adb (ie adb
> push)  After a few iterations, we'd run into a scenario where the
> controller wasn't halted.  With the following change, no failed halts after
> many iterations.
> ---
>  drivers/usb/dwc3/ep0.c    |  2 +-
>  drivers/usb/dwc3/gadget.c | 59 +++++++++++++++++++++++++++++++++++++--
>  2 files changed, 57 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
> index 59f2e8c31bd1..456aa87e8778 100644
> --- a/drivers/usb/dwc3/ep0.c
> +++ b/drivers/usb/dwc3/ep0.c
> @@ -197,7 +197,7 @@ int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
>  	int				ret;
>  
>  	spin_lock_irqsave(&dwc->lock, flags);
> -	if (!dep->endpoint.desc) {
> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {

these two should be the same. If pullups are not connected, there's no
way we can have an endpoint descriptor. Did you find a race condition here?

> @@ -1926,6 +1926,24 @@ static int dwc3_gadget_set_selfpowered(struct usb_gadget *g,
>  	return 0;
>  }
>  
> +static void dwc3_stop_active_transfers(struct dwc3 *dwc)
> +{
> +	u32 epnum;
> +
> +	for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
> +		struct dwc3_ep *dep;
> +
> +		dep = dwc->eps[epnum];
> +		if (!dep)
> +			continue;
> +
> +		if (!(dep->flags & DWC3_EP_ENABLED))
> +			continue;
> +
> +		dwc3_remove_requests(dwc, dep);
> +	}
> +}
> +
>  static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>  {
>  	u32			reg;
> @@ -1950,16 +1968,37 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>  
>  		dwc->pullups_connected = true;
>  	} else {
> +		dwc->pullups_connected = false;
> +
> +		__dwc3_gadget_ep_disable(dwc->eps[0]);
> +		__dwc3_gadget_ep_disable(dwc->eps[1]);
> +
> +		/*
> +		 * The databook explicitly mentions for a device-initiated
> +		 * disconnect sequence, the SW needs to ensure that it ends any
> +		 * active transfers.
> +		 */
> +		dwc3_stop_active_transfers(dwc);

IIRC, gadget driver is required to dequeue transfers before
disconnecting. My memory is a bit fuzzy in that area, but anyway, how
did you trigger this problem?

> @@ -1994,9 +2033,15 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>  		}
>  	}
>  
> +	/*
> +	 * Synchronize and disable any further event handling while controller
> +	 * is being enabled/disabled.
> +	 */
> +	disable_irq(dwc->irq_gadget);

looks like a call to synchronize_irq() would be enough here.
Wesley Cheng Aug. 19, 2020, 6:17 p.m. UTC | #2
On 8/19/2020 4:37 AM, Felipe Balbi wrote:
> 
> Hi,
> 
> Wesley Cheng <wcheng@codeaurora.org> writes:
>> In the DWC3 databook, for a device initiated disconnect, the driver is
>> required to send dependxfer commands for any pending transfers.
>> In addition, before the controller can move to the halted state, the SW
>> needs to acknowledge any pending events.  If the controller is not halted
>> properly, there is a chance the controller will continue accessing stale or
>> freed TRBs and buffers.
>>
>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>>
>> ---
>> Verified fix by adding a check for ETIMEDOUT during the run stop call.
>> Shell script writing to the configfs UDC file to trigger disconnect and
>> connect.  Batch script to have PC execute data transfers over adb (ie adb
>> push)  After a few iterations, we'd run into a scenario where the
>> controller wasn't halted.  With the following change, no failed halts after
>> many iterations.
>> ---
>>  drivers/usb/dwc3/ep0.c    |  2 +-
>>  drivers/usb/dwc3/gadget.c | 59 +++++++++++++++++++++++++++++++++++++--
>>  2 files changed, 57 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
>> index 59f2e8c31bd1..456aa87e8778 100644
>> --- a/drivers/usb/dwc3/ep0.c
>> +++ b/drivers/usb/dwc3/ep0.c
>> @@ -197,7 +197,7 @@ int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
>>  	int				ret;
>>  
>>  	spin_lock_irqsave(&dwc->lock, flags);
>> -	if (!dep->endpoint.desc) {
>> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {
> 
> these two should be the same. If pullups are not connected, there's no
> way we can have an endpoint descriptor. Did you find a race condition here?
> 

Hi Felipe,

At least for EP0, I don't see us clearing the EP0 desc after we set it
during dwc3_gadget_init_endpoint().  In the dwc3_gadget_ep_disable() we
only clear the desc for non control EPs:

static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep)
{
...
	/* Clear out the ep descriptors for non-ep0 */
	if (dep->number > 1) {
		dep->endpoint.comp_desc = NULL;
		dep->endpoint.desc = NULL;
	}

Is the desc for ep0 handled elsewhere? (checked ep0.c as well, but
couldn't find any references there)

>> @@ -1926,6 +1926,24 @@ static int dwc3_gadget_set_selfpowered(struct usb_gadget *g,
>>  	return 0;
>>  }
>>  
>> +static void dwc3_stop_active_transfers(struct dwc3 *dwc)
>> +{
>> +	u32 epnum;
>> +
>> +	for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
>> +		struct dwc3_ep *dep;
>> +
>> +		dep = dwc->eps[epnum];
>> +		if (!dep)
>> +			continue;
>> +
>> +		if (!(dep->flags & DWC3_EP_ENABLED))
>> +			continue;
>> +
>> +		dwc3_remove_requests(dwc, dep);
>> +	}
>> +}
>> +
>>  static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>>  {
>>  	u32			reg;
>> @@ -1950,16 +1968,37 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>>  
>>  		dwc->pullups_connected = true;
>>  	} else {
>> +		dwc->pullups_connected = false;
>> +
>> +		__dwc3_gadget_ep_disable(dwc->eps[0]);
>> +		__dwc3_gadget_ep_disable(dwc->eps[1]);
>> +
>> +		/*
>> +		 * The databook explicitly mentions for a device-initiated
>> +		 * disconnect sequence, the SW needs to ensure that it ends any
>> +		 * active transfers.
>> +		 */
>> +		dwc3_stop_active_transfers(dwc);
> 
> IIRC, gadget driver is required to dequeue transfers before
> disconnecting. My memory is a bit fuzzy in that area, but anyway, how
> did you trigger this problem?
> 

I had a script that just did the following to trigger the soft disconnect:
echo "" > /sys/kernel/config/usb_gadget/g1/UDC
sleep 4
echo "a600000.dwc3" > /sys/kernel/config/usb_gadget/g1/UDC

Then on the PC, I just had a batch file executing adb push (of a large
file), in order to create the situation where there was a device
initiated disconnect while an active transfer was occurring.  After
maybe after 4-5 iterations, I saw that the controller halt failed.

[   87.364252] dwc3_gadget_run_stop run stop = 0
[   87.374168] ffs_epfile_io_complete: eshutdown
[   87.376162] __dwc3_gadget_ep_queue
[   87.386160] ffs_epfile_io_complete: eshutdown

I added some prints to hopefully show that while we are disabling the
controller, the gadget/function driver is still active.  The eshutdown
prints happen due to the dwc3_stop_active_transfers() call, which means
there are still some pending/active reqs.

Thanks
Wesley

>> @@ -1994,9 +2033,15 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>>  		}
>>  	}
>>  
>> +	/*
>> +	 * Synchronize and disable any further event handling while controller
>> +	 * is being enabled/disabled.
>> +	 */
>> +	disable_irq(dwc->irq_gadget);
> 
> looks like a call to synchronize_irq() would be enough here.
>
Thinh Nguyen Aug. 19, 2020, 9:42 p.m. UTC | #3
Hi,

Wesley Cheng wrote:
> In the DWC3 databook, for a device initiated disconnect, the driver is
> required to send dependxfer commands for any pending transfers.
> In addition, before the controller can move to the halted state, the SW
> needs to acknowledge any pending events.  If the controller is not halted
> properly, there is a chance the controller will continue accessing stale or
> freed TRBs and buffers.
>
> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>
> ---
> Verified fix by adding a check for ETIMEDOUT during the run stop call.
> Shell script writing to the configfs UDC file to trigger disconnect and
> connect.  Batch script to have PC execute data transfers over adb (ie adb
> push)  After a few iterations, we'd run into a scenario where the
> controller wasn't halted.  With the following change, no failed halts after
> many iterations.
> ---
>  drivers/usb/dwc3/ep0.c    |  2 +-
>  drivers/usb/dwc3/gadget.c | 59 +++++++++++++++++++++++++++++++++++++--
>  2 files changed, 57 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
> index 59f2e8c31bd1..456aa87e8778 100644
> --- a/drivers/usb/dwc3/ep0.c
> +++ b/drivers/usb/dwc3/ep0.c
> @@ -197,7 +197,7 @@ int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
>  	int				ret;
>  
>  	spin_lock_irqsave(&dwc->lock, flags);
> -	if (!dep->endpoint.desc) {
> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {
>  		dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
>  				dep->name);
>  		ret = -ESHUTDOWN;
> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
> index 3ab6f118c508..1f981942d7f9 100644
> --- a/drivers/usb/dwc3/gadget.c
> +++ b/drivers/usb/dwc3/gadget.c
> @@ -1516,7 +1516,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
>  {
>  	struct dwc3		*dwc = dep->dwc;
>  
> -	if (!dep->endpoint.desc) {
> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {
>  		dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
>  				dep->name);
>  		return -ESHUTDOWN;
> @@ -1926,6 +1926,24 @@ static int dwc3_gadget_set_selfpowered(struct usb_gadget *g,
>  	return 0;
>  }
>  
> +static void dwc3_stop_active_transfers(struct dwc3 *dwc)
> +{
> +	u32 epnum;
> +
> +	for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
> +		struct dwc3_ep *dep;
> +
> +		dep = dwc->eps[epnum];
> +		if (!dep)
> +			continue;
> +
> +		if (!(dep->flags & DWC3_EP_ENABLED))
> +			continue;
> +
> +		dwc3_remove_requests(dwc, dep);
> +	}
> +}
> +
>  static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>  {
>  	u32			reg;
> @@ -1950,16 +1968,37 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>  
>  		dwc->pullups_connected = true;
>  	} else {
> +		dwc->pullups_connected = false;
> +
> +		__dwc3_gadget_ep_disable(dwc->eps[0]);
> +		__dwc3_gadget_ep_disable(dwc->eps[1]);

run_stop() function shouldn't be doing this. This is done in
dwc3_gadget_stop(). Also, if it's device-initiated disconnect, driver
needs to wait for control transfers to complete.

> +
> +		/*
> +		 * The databook explicitly mentions for a device-initiated
> +		 * disconnect sequence, the SW needs to ensure that it ends any
> +		 * active transfers.
> +		 */
> +		dwc3_stop_active_transfers(dwc);

It shouldn't be done here. Maybe move this to the dwc3_gadget_pullup()
function. The run_stop() function can be called for other context beside
this (e.g. hibernation).

> +
>  		reg &= ~DWC3_DCTL_RUN_STOP;
>  
>  		if (dwc->has_hibernation && !suspend)
>  			reg &= ~DWC3_DCTL_KEEP_CONNECT;
> -
> -		dwc->pullups_connected = false;
>  	}
>  
>  	dwc3_gadget_dctl_write_safe(dwc, reg);
>  
> +	/* Controller is not halted until pending events are acknowledged */
> +	if (!is_on) {
> +		reg = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
> +		reg &= DWC3_GEVNTCOUNT_MASK;
> +		if (reg > 0) {
> +			dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), reg);
> +			dwc->ev_buf->lpos = (dwc->ev_buf->lpos + reg) %
> +						dwc->ev_buf->length;
> +		}
> +	}
> +

Driver should handle the events before clearing the run_stop bit, not
just acknowledging and ignoring them.

>  	do {
>  		reg = dwc3_readl(dwc->regs, DWC3_DSTS);
>  		reg &= DWC3_DSTS_DEVCTRLHLT;
> @@ -1994,9 +2033,15 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>  		}
>  	}
>  
> +	/*
> +	 * Synchronize and disable any further event handling while controller
> +	 * is being enabled/disabled.
> +	 */
> +	disable_irq(dwc->irq_gadget);
>  	spin_lock_irqsave(&dwc->lock, flags);
>  	ret = dwc3_gadget_run_stop(dwc, is_on, false);
>  	spin_unlock_irqrestore(&dwc->lock, flags);
> +	enable_irq(dwc->irq_gadget);
>  
>  	return ret;
>  }
> @@ -3535,6 +3580,14 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt)
>  	if (!count)
>  		return IRQ_NONE;
>  
> +	/* Controller is halted; ignore new/pending events */
> +	if (!dwc->pullups_connected) {
> +		dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), count);
> +		dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) %
> +					dwc->ev_buf->length;
> +		return IRQ_HANDLED;
> +	}
> +

Why? Are you getting any event after the controller is halted? Also,
make sure to take account of hibernation. The controller can get PMU
event after halted to bring it out of hibernation.

>  	evt->count = count;
>  	evt->flags |= DWC3_EVENT_PENDING;
>  

If you're making these fixes, can you also fix handling reset interrupt
? It also needs to end all the active transfers.

Thanks,
Thinh
Wesley Cheng Aug. 20, 2020, 1:59 a.m. UTC | #4
On 8/19/2020 2:42 PM, Thinh Nguyen wrote:
> Hi,
> 
> Wesley Cheng wrote:
>> In the DWC3 databook, for a device initiated disconnect, the driver is
>> required to send dependxfer commands for any pending transfers.
>> In addition, before the controller can move to the halted state, the SW
>> needs to acknowledge any pending events.  If the controller is not halted
>> properly, there is a chance the controller will continue accessing stale or
>> freed TRBs and buffers.
>>
>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>>
>> ---
>> Verified fix by adding a check for ETIMEDOUT during the run stop call.
>> Shell script writing to the configfs UDC file to trigger disconnect and
>> connect.  Batch script to have PC execute data transfers over adb (ie adb
>> push)  After a few iterations, we'd run into a scenario where the
>> controller wasn't halted.  With the following change, no failed halts after
>> many iterations.
>> ---
>>  drivers/usb/dwc3/ep0.c    |  2 +-
>>  drivers/usb/dwc3/gadget.c | 59 +++++++++++++++++++++++++++++++++++++--
>>  2 files changed, 57 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
>> index 59f2e8c31bd1..456aa87e8778 100644
>> --- a/drivers/usb/dwc3/ep0.c
>> +++ b/drivers/usb/dwc3/ep0.c
>> @@ -197,7 +197,7 @@ int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
>>  	int				ret;
>>  
>>  	spin_lock_irqsave(&dwc->lock, flags);
>> -	if (!dep->endpoint.desc) {
>> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {
>>  		dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
>>  				dep->name);
>>  		ret = -ESHUTDOWN;
>> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
>> index 3ab6f118c508..1f981942d7f9 100644
>> --- a/drivers/usb/dwc3/gadget.c
>> +++ b/drivers/usb/dwc3/gadget.c
>> @@ -1516,7 +1516,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
>>  {
>>  	struct dwc3		*dwc = dep->dwc;
>>  
>> -	if (!dep->endpoint.desc) {
>> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {
>>  		dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
>>  				dep->name);
>>  		return -ESHUTDOWN;
>> @@ -1926,6 +1926,24 @@ static int dwc3_gadget_set_selfpowered(struct usb_gadget *g,
>>  	return 0;
>>  }
>>  
>> +static void dwc3_stop_active_transfers(struct dwc3 *dwc)
>> +{
>> +	u32 epnum;
>> +
>> +	for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
>> +		struct dwc3_ep *dep;
>> +
>> +		dep = dwc->eps[epnum];
>> +		if (!dep)
>> +			continue;
>> +
>> +		if (!(dep->flags & DWC3_EP_ENABLED))
>> +			continue;
>> +
>> +		dwc3_remove_requests(dwc, dep);
>> +	}
>> +}
>> +
>>  static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>>  {
>>  	u32			reg;
>> @@ -1950,16 +1968,37 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>>  
>>  		dwc->pullups_connected = true;
>>  	} else {
>> +		dwc->pullups_connected = false;
>> +
>> +		__dwc3_gadget_ep_disable(dwc->eps[0]);
>> +		__dwc3_gadget_ep_disable(dwc->eps[1]);
> 
> run_stop() function shouldn't be doing this. This is done in
> dwc3_gadget_stop(). Also, if it's device-initiated disconnect, driver
> needs to wait for control transfers to complete.
> 

Hi Thinh ,

Thanks for the feedback.

We already wait for the ep0state to move to the setup stage before
running the run stop routine, but events can still be triggered until
the controller is halted. (which is not started until we attempt to
write to the DCTL register) The reasoning will be the same as the below
comment.

>> +
>> +		/*
>> +		 * The databook explicitly mentions for a device-initiated
>> +		 * disconnect sequence, the SW needs to ensure that it ends any
>> +		 * active transfers.
>> +		 */
>> +		dwc3_stop_active_transfers(dwc);
> 
> It shouldn't be done here. Maybe move this to the dwc3_gadget_pullup()
> function. The run_stop() function can be called for other context beside
> this (e.g. hibernation).
> 

It was preferred to have it placed after the pullups_connected was set
to false, so that further ep queues would be blocked (with the added
check), and we can ensure after the stop active xfers was run, nothing
could be pending.

Also, for the hibernation case, the databook mentions that we should
issue the end transfer routine as well for the hibernation w/ device
disconnected situation. (I don't believe the current DWC3 gadget driver
supports the hibernation while device connected case, which has some
considerations we would need to address)

>> +
>>  		reg &= ~DWC3_DCTL_RUN_STOP;
>>  
>>  		if (dwc->has_hibernation && !suspend)
>>  			reg &= ~DWC3_DCTL_KEEP_CONNECT;
>> -
>> -		dwc->pullups_connected = false;
>>  	}
>>  
>>  	dwc3_gadget_dctl_write_safe(dwc, reg);
>>  
>> +	/* Controller is not halted until pending events are acknowledged */
>> +	if (!is_on) {
>> +		reg = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
>> +		reg &= DWC3_GEVNTCOUNT_MASK;
>> +		if (reg > 0) {
>> +			dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), reg);
>> +			dwc->ev_buf->lpos = (dwc->ev_buf->lpos + reg) %
>> +						dwc->ev_buf->length;
>> +		}
>> +	}
>> +
> 
> Driver should handle the events before clearing the run_stop bit, not
> just acknowledging and ignoring them.
> 

Do you think its better if we call the dwc3_check_event_buf() and
dwc3_process_event_buf() here?  That will handle the clearing of the
events and allow them to be handled.  There are some snippets in the
databook, which mentions that we don't need to handle/process the
events, and just acknowledge them. (it mentions this in the hibernation
section)

>>  	do {
>>  		reg = dwc3_readl(dwc->regs, DWC3_DSTS);
>>  		reg &= DWC3_DSTS_DEVCTRLHLT;
>> @@ -1994,9 +2033,15 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>>  		}
>>  	}
>>  
>> +	/*
>> +	 * Synchronize and disable any further event handling while controller
>> +	 * is being enabled/disabled.
>> +	 */
>> +	disable_irq(dwc->irq_gadget);
>>  	spin_lock_irqsave(&dwc->lock, flags);
>>  	ret = dwc3_gadget_run_stop(dwc, is_on, false);
>>  	spin_unlock_irqrestore(&dwc->lock, flags);
>> +	enable_irq(dwc->irq_gadget);
>>  
>>  	return ret;
>>  }
>> @@ -3535,6 +3580,14 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt)
>>  	if (!count)
>>  		return IRQ_NONE;
>>  
>> +	/* Controller is halted; ignore new/pending events */
>> +	if (!dwc->pullups_connected) {
>> +		dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), count);
>> +		dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) %
>> +					dwc->ev_buf->length;
>> +		return IRQ_HANDLED;
>> +	}
>> +
> 
> Why? Are you getting any event after the controller is halted? Also,
> make sure to take account of hibernation. The controller can get PMU
> event after halted to bring it out of hibernation.
> 

We aren't getting any events after the controller is halted, but before
we go and try to clear the run stop bit.  I added a print in the run
stop API after clearing the RS bit, and there were pending events in the
controller at that time.  It might be redundant to have this if we are
disabling the IRQ already before the run stop call.

I see, so maybe to ensure we don't block the PMU event, we can remove
this, and rely on the disable_irq() and the run stop API to ensure no
events will be pending.

>>  	evt->count = count;
>>  	evt->flags |= DWC3_EVENT_PENDING;
>>  
> 
> If you're making these fixes, can you also fix handling reset interrupt
> ? It also needs to end all the active transfers.
> 

Sure, I can consider that as well.

Thanks
Wesley

> Thanks,
> Thinh
>
Thinh Nguyen Aug. 20, 2020, 3:01 a.m. UTC | #5
Wesley Cheng wrote:
> On 8/19/2020 2:42 PM, Thinh Nguyen wrote:
>> Hi,
>>
>> Wesley Cheng wrote:
>>> In the DWC3 databook, for a device initiated disconnect, the driver is
>>> required to send dependxfer commands for any pending transfers.
>>> In addition, before the controller can move to the halted state, the SW
>>> needs to acknowledge any pending events.  If the controller is not halted
>>> properly, there is a chance the controller will continue accessing stale or
>>> freed TRBs and buffers.
>>>
>>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>>>
>>> ---
>>> Verified fix by adding a check for ETIMEDOUT during the run stop call.
>>> Shell script writing to the configfs UDC file to trigger disconnect and
>>> connect.  Batch script to have PC execute data transfers over adb (ie adb
>>> push)  After a few iterations, we'd run into a scenario where the
>>> controller wasn't halted.  With the following change, no failed halts after
>>> many iterations.

Btw, we have some sysfs attributes to do soft-connect/disconnect also
/sys/class/udc/<UDC>/soft_connect


>>> ---
>>>  drivers/usb/dwc3/ep0.c    |  2 +-
>>>  drivers/usb/dwc3/gadget.c | 59 +++++++++++++++++++++++++++++++++++++--
>>>  2 files changed, 57 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
>>> index 59f2e8c31bd1..456aa87e8778 100644
>>> --- a/drivers/usb/dwc3/ep0.c
>>> +++ b/drivers/usb/dwc3/ep0.c
>>> @@ -197,7 +197,7 @@ int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
>>>  	int				ret;
>>>  
>>>  	spin_lock_irqsave(&dwc->lock, flags);
>>> -	if (!dep->endpoint.desc) {
>>> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {
>>>  		dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
>>>  				dep->name);
>>>  		ret = -ESHUTDOWN;
>>> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
>>> index 3ab6f118c508..1f981942d7f9 100644
>>> --- a/drivers/usb/dwc3/gadget.c
>>> +++ b/drivers/usb/dwc3/gadget.c
>>> @@ -1516,7 +1516,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
>>>  {
>>>  	struct dwc3		*dwc = dep->dwc;
>>>  
>>> -	if (!dep->endpoint.desc) {
>>> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {
>>>  		dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
>>>  				dep->name);
>>>  		return -ESHUTDOWN;
>>> @@ -1926,6 +1926,24 @@ static int dwc3_gadget_set_selfpowered(struct usb_gadget *g,
>>>  	return 0;
>>>  }
>>>  
>>> +static void dwc3_stop_active_transfers(struct dwc3 *dwc)
>>> +{
>>> +	u32 epnum;
>>> +
>>> +	for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
>>> +		struct dwc3_ep *dep;
>>> +
>>> +		dep = dwc->eps[epnum];
>>> +		if (!dep)
>>> +			continue;
>>> +
>>> +		if (!(dep->flags & DWC3_EP_ENABLED))
>>> +			continue;
>>> +
>>> +		dwc3_remove_requests(dwc, dep);
>>> +	}
>>> +}
>>> +
>>>  static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>>>  {
>>>  	u32			reg;
>>> @@ -1950,16 +1968,37 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>>>  
>>>  		dwc->pullups_connected = true;
>>>  	} else {
>>> +		dwc->pullups_connected = false;
>>> +
>>> +		__dwc3_gadget_ep_disable(dwc->eps[0]);
>>> +		__dwc3_gadget_ep_disable(dwc->eps[1]);
>> run_stop() function shouldn't be doing this. This is done in
>> dwc3_gadget_stop(). Also, if it's device-initiated disconnect, driver
>> needs to wait for control transfers to complete.
>>
> Hi Thinh ,
>
> Thanks for the feedback.

Thanks for the patch :)

>
> We already wait for the ep0state to move to the setup stage before
> running the run stop routine, but events can still be triggered until
> the controller is halted. (which is not started until we attempt to
> write to the DCTL register) The reasoning will be the same as the below
> comment.
>
>>> +
>>> +		/*
>>> +		 * The databook explicitly mentions for a device-initiated
>>> +		 * disconnect sequence, the SW needs to ensure that it ends any
>>> +		 * active transfers.
>>> +		 */
>>> +		dwc3_stop_active_transfers(dwc);
>> It shouldn't be done here. Maybe move this to the dwc3_gadget_pullup()
>> function. The run_stop() function can be called for other context beside
>> this (e.g. hibernation).
>>
> It was preferred to have it placed after the pullups_connected was set
> to false, so that further ep queues would be blocked (with the added
> check), and we can ensure after the stop active xfers was run, nothing
> could be pending.

It should be fine if you do it inside the spin_lock of the
dwc3_gadget_pullup(). I'm trying to keep the run_stop() function
separate for the context of hibernation. For hibernation, the driver
needs to wait for the end_transfer commands to complete (to save TRBs
state and to resume transfer later). Granted that we can just add a
check "if (!hibernation)" or modify the code later, but I think it's
cleaner this way.

>
> Also, for the hibernation case, the databook mentions that we should
> issue the end transfer routine as well for the hibernation w/ device
> disconnected situation.

That's true, but see my comment above.

>  (I don't believe the current DWC3 gadget driver
> supports the hibernation while device connected case, which has some
> considerations we would need to address)

Right now, the DWC3 driver doesn't handle hibernation at all. I've yet
to push those changes out.

>
>>> +
>>>  		reg &= ~DWC3_DCTL_RUN_STOP;
>>>  
>>>  		if (dwc->has_hibernation && !suspend)
>>>  			reg &= ~DWC3_DCTL_KEEP_CONNECT;
>>> -
>>> -		dwc->pullups_connected = false;
>>>  	}
>>>  
>>>  	dwc3_gadget_dctl_write_safe(dwc, reg);
>>>  
>>> +	/* Controller is not halted until pending events are acknowledged */
>>> +	if (!is_on) {
>>> +		reg = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
>>> +		reg &= DWC3_GEVNTCOUNT_MASK;
>>> +		if (reg > 0) {
>>> +			dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), reg);
>>> +			dwc->ev_buf->lpos = (dwc->ev_buf->lpos + reg) %
>>> +						dwc->ev_buf->length;
>>> +		}
>>> +	}
>>> +
>> Driver should handle the events before clearing the run_stop bit, not
>> just acknowledging and ignoring them.
>>
> Do you think its better if we call the dwc3_check_event_buf() and
> dwc3_process_event_buf() here?  That will handle the clearing of the
> events and allow them to be handled.  There are some snippets in the
> databook, which mentions that we don't need to handle/process the
> events, and just acknowledge them. (it mentions this in the hibernation
> section)

I think this should be handle separately from the run_stop() function.
Maybe put it in the pullup() function just for device initiated
disconnect case only.

>
>>>  	do {
>>>  		reg = dwc3_readl(dwc->regs, DWC3_DSTS);
>>>  		reg &= DWC3_DSTS_DEVCTRLHLT;
>>> @@ -1994,9 +2033,15 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>>>  		}
>>>  	}
>>>  
>>> +	/*
>>> +	 * Synchronize and disable any further event handling while controller
>>> +	 * is being enabled/disabled.
>>> +	 */
>>> +	disable_irq(dwc->irq_gadget);
>>>  	spin_lock_irqsave(&dwc->lock, flags);
>>>  	ret = dwc3_gadget_run_stop(dwc, is_on, false);
>>>  	spin_unlock_irqrestore(&dwc->lock, flags);
>>> +	enable_irq(dwc->irq_gadget);
>>>  
>>>  	return ret;
>>>  }
>>> @@ -3535,6 +3580,14 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt)
>>>  	if (!count)
>>>  		return IRQ_NONE;
>>>  
>>> +	/* Controller is halted; ignore new/pending events */
>>> +	if (!dwc->pullups_connected) {
>>> +		dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), count);
>>> +		dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) %
>>> +					dwc->ev_buf->length;
>>> +		return IRQ_HANDLED;
>>> +	}
>>> +
>> Why? Are you getting any event after the controller is halted? Also,
>> make sure to take account of hibernation. The controller can get PMU
>> event after halted to bring it out of hibernation.
>>
> We aren't getting any events after the controller is halted, but before
> we go and try to clear the run stop bit.  I added a print in the run
> stop API after clearing the RS bit, and there were pending events in the
> controller at that time.  It might be redundant to have this if we are
> disabling the IRQ already before the run stop call.
>
> I see, so maybe to ensure we don't block the PMU event, we can remove
> this, and rely on the disable_irq() and the run stop API to ensure no
> events will be pending.
>
>>>  	evt->count = count;
>>>  	evt->flags |= DWC3_EVENT_PENDING;
>>>  
>> If you're making these fixes, can you also fix handling reset interrupt
>> ? It also needs to end all the active transfers.
>>
> Sure, I can consider that as well.
>

Thanks,
Thinh
diff mbox series

Patch

diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 59f2e8c31bd1..456aa87e8778 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -197,7 +197,7 @@  int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
 	int				ret;
 
 	spin_lock_irqsave(&dwc->lock, flags);
-	if (!dep->endpoint.desc) {
+	if (!dep->endpoint.desc || !dwc->pullups_connected) {
 		dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
 				dep->name);
 		ret = -ESHUTDOWN;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 3ab6f118c508..1f981942d7f9 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1516,7 +1516,7 @@  static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 {
 	struct dwc3		*dwc = dep->dwc;
 
-	if (!dep->endpoint.desc) {
+	if (!dep->endpoint.desc || !dwc->pullups_connected) {
 		dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
 				dep->name);
 		return -ESHUTDOWN;
@@ -1926,6 +1926,24 @@  static int dwc3_gadget_set_selfpowered(struct usb_gadget *g,
 	return 0;
 }
 
+static void dwc3_stop_active_transfers(struct dwc3 *dwc)
+{
+	u32 epnum;
+
+	for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
+		struct dwc3_ep *dep;
+
+		dep = dwc->eps[epnum];
+		if (!dep)
+			continue;
+
+		if (!(dep->flags & DWC3_EP_ENABLED))
+			continue;
+
+		dwc3_remove_requests(dwc, dep);
+	}
+}
+
 static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
 {
 	u32			reg;
@@ -1950,16 +1968,37 @@  static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
 
 		dwc->pullups_connected = true;
 	} else {
+		dwc->pullups_connected = false;
+
+		__dwc3_gadget_ep_disable(dwc->eps[0]);
+		__dwc3_gadget_ep_disable(dwc->eps[1]);
+
+		/*
+		 * The databook explicitly mentions for a device-initiated
+		 * disconnect sequence, the SW needs to ensure that it ends any
+		 * active transfers.
+		 */
+		dwc3_stop_active_transfers(dwc);
+
 		reg &= ~DWC3_DCTL_RUN_STOP;
 
 		if (dwc->has_hibernation && !suspend)
 			reg &= ~DWC3_DCTL_KEEP_CONNECT;
-
-		dwc->pullups_connected = false;
 	}
 
 	dwc3_gadget_dctl_write_safe(dwc, reg);
 
+	/* Controller is not halted until pending events are acknowledged */
+	if (!is_on) {
+		reg = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
+		reg &= DWC3_GEVNTCOUNT_MASK;
+		if (reg > 0) {
+			dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), reg);
+			dwc->ev_buf->lpos = (dwc->ev_buf->lpos + reg) %
+						dwc->ev_buf->length;
+		}
+	}
+
 	do {
 		reg = dwc3_readl(dwc->regs, DWC3_DSTS);
 		reg &= DWC3_DSTS_DEVCTRLHLT;
@@ -1994,9 +2033,15 @@  static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
 		}
 	}
 
+	/*
+	 * Synchronize and disable any further event handling while controller
+	 * is being enabled/disabled.
+	 */
+	disable_irq(dwc->irq_gadget);
 	spin_lock_irqsave(&dwc->lock, flags);
 	ret = dwc3_gadget_run_stop(dwc, is_on, false);
 	spin_unlock_irqrestore(&dwc->lock, flags);
+	enable_irq(dwc->irq_gadget);
 
 	return ret;
 }
@@ -3535,6 +3580,14 @@  static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt)
 	if (!count)
 		return IRQ_NONE;
 
+	/* Controller is halted; ignore new/pending events */
+	if (!dwc->pullups_connected) {
+		dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), count);
+		dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) %
+					dwc->ev_buf->length;
+		return IRQ_HANDLED;
+	}
+
 	evt->count = count;
 	evt->flags |= DWC3_EVENT_PENDING;