diff mbox series

[v2,4/5] soundwire: cadence: Fix lost ATTACHED interrupts when enumerating

Message ID 20220907085259.3602-5-rf@opensource.cirrus.com (mailing list archive)
State Superseded
Headers show
Series soundwire: Fixes for spurious and missing UNATTACH | expand

Commit Message

Richard Fitzgerald Sept. 7, 2022, 8:52 a.m. UTC
The correct way to handle interrupts is to clear the bits we
are about to handle _before_ handling them. Thus if the condition
then re-asserts during the handling we won't lose it.

This patch changes cdns_update_slave_status_work() to do this.

The previous code cleared the interrupts after handling them.
The problem with this is that when handling enumeration of devices
the ATTACH statuses can be accidentally cleared and so some or all
of the devices never complete their enumeration.

Thus we can have a situation like this:
- one or more devices are reverting to ID #0

- accumulated status bits indicate some devices attached and some
  on ID #0. (Remember: status bits are sticky until they are handled)

- Because of device on #0 sdw_handle_slave_status() programs the
  device ID and exits without handling the other status, expecting
  to get an ATTACHED from this reprogrammed device.

- The device immediately starts reporting ATTACHED in PINGs, which
  will assert its CDNS_MCP_SLAVE_INTSTAT_ATTACHED bit.

- cdns_update_slave_status_work() clears INTSTAT0/1. If the initial
  status had CDNS_MCP_SLAVE_INTSTAT_ATTACHED bit set it will be
  cleared.

- The ATTACHED change for the device has now been lost.

- cdns_update_slave_status_work() clears CDNS_MCP_INT_SLAVE_MASK so
  if the new ATTACHED state had set it, it will be cleared without
  ever having been handled.

Unless there is some other state change from another device to cause
a new interrupt, the ATTACHED state of the reprogrammed device will
never cause an interrupt so its enumeration will not be completed.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
---
 drivers/soundwire/cadence_master.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

Comments

Pierre-Louis Bossart Sept. 12, 2022, 11:05 a.m. UTC | #1
On 9/7/22 10:52, Richard Fitzgerald wrote:
> The correct way to handle interrupts is to clear the bits we
> are about to handle _before_ handling them. Thus if the condition
> then re-asserts during the handling we won't lose it.
> 
> This patch changes cdns_update_slave_status_work() to do this.
> 
> The previous code cleared the interrupts after handling them.
> The problem with this is that when handling enumeration of devices
> the ATTACH statuses can be accidentally cleared and so some or all
> of the devices never complete their enumeration.
> 
> Thus we can have a situation like this:
> - one or more devices are reverting to ID #0
> 
> - accumulated status bits indicate some devices attached and some
>   on ID #0. (Remember: status bits are sticky until they are handled)
> 
> - Because of device on #0 sdw_handle_slave_status() programs the
>   device ID and exits without handling the other status, expecting
>   to get an ATTACHED from this reprogrammed device.
> 
> - The device immediately starts reporting ATTACHED in PINGs, which
>   will assert its CDNS_MCP_SLAVE_INTSTAT_ATTACHED bit.
> 
> - cdns_update_slave_status_work() clears INTSTAT0/1. If the initial
>   status had CDNS_MCP_SLAVE_INTSTAT_ATTACHED bit set it will be
>   cleared.
> 
> - The ATTACHED change for the device has now been lost.
> 
> - cdns_update_slave_status_work() clears CDNS_MCP_INT_SLAVE_MASK so
>   if the new ATTACHED state had set it, it will be cleared without
>   ever having been handled.
> 
> Unless there is some other state change from another device to cause
> a new interrupt, the ATTACHED state of the reprogrammed device will
> never cause an interrupt so its enumeration will not be completed.
> 
> Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
> ---
>  drivers/soundwire/cadence_master.c | 18 ++++++++++++++----
>  1 file changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c
> index 245191d22ccd..3acd7b89c940 100644
> --- a/drivers/soundwire/cadence_master.c
> +++ b/drivers/soundwire/cadence_master.c
> @@ -954,9 +954,22 @@ static void cdns_update_slave_status_work(struct work_struct *work)
>  	u32 device0_status;
>  	int retry_count = 0;
>  
> +	/*
> +	 * Clear main interrupt first so we don't lose any assertions
> +	 * the happen during this function.
> +	 */
> +	cdns_writel(cdns, CDNS_MCP_INTSTAT, CDNS_MCP_INT_SLAVE_MASK);
> +
>  	slave0 = cdns_readl(cdns, CDNS_MCP_SLAVE_INTSTAT0);
>  	slave1 = cdns_readl(cdns, CDNS_MCP_SLAVE_INTSTAT1);
>  
> +	/*
> +	 * Clear the bits before handling so we don't lose any
> +	 * bits that re-assert.
> +	 */
> +	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT0, slave0);
> +	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT1, slave1);
> +
>  	/* combine the two status */
>  	slave_intstat = ((u64)slave1 << 32) | slave0;
>  
> @@ -964,8 +977,6 @@ static void cdns_update_slave_status_work(struct work_struct *work)
>  
>  update_status:
>  	cdns_update_slave_status(cdns, slave_intstat);
> -	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT0, slave0);
> -	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT1, slave1);

this one is hard to review, if you don't clear the status here, then how
does the retry work if there is a new event?

Put differently, do we need to retry and the 'goto update_status' any more?

>  
>  	/*
>  	 * When there is more than one peripheral per link, it's
> @@ -1001,8 +1012,7 @@ static void cdns_update_slave_status_work(struct work_struct *work)
>  		}
>  	}
>  
> -	/* clear and unmask Slave interrupt now */
> -	cdns_writel(cdns, CDNS_MCP_INTSTAT, CDNS_MCP_INT_SLAVE_MASK);
> +	/* unmask Slave interrupt now */
>  	cdns_updatel(cdns, CDNS_MCP_INTMASK,
>  		     CDNS_MCP_INT_SLAVE_MASK, CDNS_MCP_INT_SLAVE_MASK);
>
Richard Fitzgerald Sept. 12, 2022, 12:36 p.m. UTC | #2
On 12/09/2022 12:05, Pierre-Louis Bossart wrote:
> 
> 
> On 9/7/22 10:52, Richard Fitzgerald wrote:
>> The correct way to handle interrupts is to clear the bits we
>> are about to handle _before_ handling them. Thus if the condition
>> then re-asserts during the handling we won't lose it.
>>
>> This patch changes cdns_update_slave_status_work() to do this.
>>
>> The previous code cleared the interrupts after handling them.
>> The problem with this is that when handling enumeration of devices
>> the ATTACH statuses can be accidentally cleared and so some or all
>> of the devices never complete their enumeration.
>>
>> Thus we can have a situation like this:
>> - one or more devices are reverting to ID #0
>>
>> - accumulated status bits indicate some devices attached and some
>>    on ID #0. (Remember: status bits are sticky until they are handled)
>>
>> - Because of device on #0 sdw_handle_slave_status() programs the
>>    device ID and exits without handling the other status, expecting
>>    to get an ATTACHED from this reprogrammed device.
>>
>> - The device immediately starts reporting ATTACHED in PINGs, which
>>    will assert its CDNS_MCP_SLAVE_INTSTAT_ATTACHED bit.
>>
>> - cdns_update_slave_status_work() clears INTSTAT0/1. If the initial
>>    status had CDNS_MCP_SLAVE_INTSTAT_ATTACHED bit set it will be
>>    cleared.
>>
>> - The ATTACHED change for the device has now been lost.
>>
>> - cdns_update_slave_status_work() clears CDNS_MCP_INT_SLAVE_MASK so
>>    if the new ATTACHED state had set it, it will be cleared without
>>    ever having been handled.
>>
>> Unless there is some other state change from another device to cause
>> a new interrupt, the ATTACHED state of the reprogrammed device will
>> never cause an interrupt so its enumeration will not be completed.
>>
>> Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
>> ---
>>   drivers/soundwire/cadence_master.c | 18 ++++++++++++++----
>>   1 file changed, 14 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c
>> index 245191d22ccd..3acd7b89c940 100644
>> --- a/drivers/soundwire/cadence_master.c
>> +++ b/drivers/soundwire/cadence_master.c
>> @@ -954,9 +954,22 @@ static void cdns_update_slave_status_work(struct work_struct *work)
>>   	u32 device0_status;
>>   	int retry_count = 0;
>>   
>> +	/*
>> +	 * Clear main interrupt first so we don't lose any assertions
>> +	 * the happen during this function.
>> +	 */
>> +	cdns_writel(cdns, CDNS_MCP_INTSTAT, CDNS_MCP_INT_SLAVE_MASK);
>> +
>>   	slave0 = cdns_readl(cdns, CDNS_MCP_SLAVE_INTSTAT0);
>>   	slave1 = cdns_readl(cdns, CDNS_MCP_SLAVE_INTSTAT1);
>>   
>> +	/*
>> +	 * Clear the bits before handling so we don't lose any
>> +	 * bits that re-assert.
>> +	 */
>> +	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT0, slave0);
>> +	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT1, slave1);
>> +
>>   	/* combine the two status */
>>   	slave_intstat = ((u64)slave1 << 32) | slave0;
>>   
>> @@ -964,8 +977,6 @@ static void cdns_update_slave_status_work(struct work_struct *work)
>>   
>>   update_status:
>>   	cdns_update_slave_status(cdns, slave_intstat);
>> -	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT0, slave0);
>> -	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT1, slave1);
> 
> this one is hard to review, if you don't clear the status here, then how
> does the retry work if there is a new event?

The retry loop doesn't work off the interrupt status bits. Precisely
because the #0 ATTACH bit probably doesn't re-assert if the PING status
for #0 doesn't change, the retry checks the most recent PING response
instead.

> 
> Put differently, do we need to retry and the 'goto update_status' any more?
> 

Yes, I believe you do still need it. The Cadence interrupts appear to
assert when there is a change of status. If there are multiple devices
reporting on dev ID #0 then the PING status of #0 will not change until
they have all been reprogrammed, so it will not automatically re-assert.

Anyway, I don't want to mix bugfixes with code improvements. If the loop
_could_ be removed that should be done separately from fixing the
interrupt handling bug.

>>   
>>   	/*
>>   	 * When there is more than one peripheral per link, it's
>> @@ -1001,8 +1012,7 @@ static void cdns_update_slave_status_work(struct work_struct *work)
>>   		}
>>   	}
>>   
>> -	/* clear and unmask Slave interrupt now */
>> -	cdns_writel(cdns, CDNS_MCP_INTSTAT, CDNS_MCP_INT_SLAVE_MASK);
>> +	/* unmask Slave interrupt now */
>>   	cdns_updatel(cdns, CDNS_MCP_INTMASK,
>>   		     CDNS_MCP_INT_SLAVE_MASK, CDNS_MCP_INT_SLAVE_MASK);
>>
diff mbox series

Patch

diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c
index 245191d22ccd..3acd7b89c940 100644
--- a/drivers/soundwire/cadence_master.c
+++ b/drivers/soundwire/cadence_master.c
@@ -954,9 +954,22 @@  static void cdns_update_slave_status_work(struct work_struct *work)
 	u32 device0_status;
 	int retry_count = 0;
 
+	/*
+	 * Clear main interrupt first so we don't lose any assertions
+	 * the happen during this function.
+	 */
+	cdns_writel(cdns, CDNS_MCP_INTSTAT, CDNS_MCP_INT_SLAVE_MASK);
+
 	slave0 = cdns_readl(cdns, CDNS_MCP_SLAVE_INTSTAT0);
 	slave1 = cdns_readl(cdns, CDNS_MCP_SLAVE_INTSTAT1);
 
+	/*
+	 * Clear the bits before handling so we don't lose any
+	 * bits that re-assert.
+	 */
+	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT0, slave0);
+	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT1, slave1);
+
 	/* combine the two status */
 	slave_intstat = ((u64)slave1 << 32) | slave0;
 
@@ -964,8 +977,6 @@  static void cdns_update_slave_status_work(struct work_struct *work)
 
 update_status:
 	cdns_update_slave_status(cdns, slave_intstat);
-	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT0, slave0);
-	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT1, slave1);
 
 	/*
 	 * When there is more than one peripheral per link, it's
@@ -1001,8 +1012,7 @@  static void cdns_update_slave_status_work(struct work_struct *work)
 		}
 	}
 
-	/* clear and unmask Slave interrupt now */
-	cdns_writel(cdns, CDNS_MCP_INTSTAT, CDNS_MCP_INT_SLAVE_MASK);
+	/* unmask Slave interrupt now */
 	cdns_updatel(cdns, CDNS_MCP_INTMASK,
 		     CDNS_MCP_INT_SLAVE_MASK, CDNS_MCP_INT_SLAVE_MASK);