diff mbox series

[3/4] ASoC: SOF: amd: Handle IPC replies before FW_BOOT_COMPLETE

Message ID 20250207-sof-vangogh-fixes-v1-3-67824c1e4c9a@collabora.com (mailing list archive)
State Accepted
Commit ac84ca815adb4171a4276b1d44096b75f6a150b7
Headers show
Series Sound fix for Valve Steam Deck OLED on resume from suspend | expand

Commit Message

Cristian Ciocaltea Feb. 7, 2025, 11:46 a.m. UTC
In some cases, e.g. during resuming from suspend, there is a possibility
that some IPC reply messages get received by the host while the DSP
firmware has not yet reached the complete boot state.

Detect when this happens and do not attempt to process the unexpected
replies from DSP.  Instead, provide proper debugging support.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 sound/soc/sof/amd/acp-ipc.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

Comments

Vijendar Mukunda Feb. 7, 2025, 11:55 a.m. UTC | #1
On 07/02/25 17:16, Cristian Ciocaltea wrote:
> In some cases, e.g. during resuming from suspend, there is a possibility
> that some IPC reply messages get received by the host while the DSP
> firmware has not yet reached the complete boot state.
>
> Detect when this happens and do not attempt to process the unexpected
> replies from DSP.  Instead, provide proper debugging support.
As per our understanding, before FW boot completion there won't
be any IPC responses sent from Firmware.
In this case, do we really need such a condition check?
>
> Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
> ---
>  sound/soc/sof/amd/acp-ipc.c | 23 ++++++++++++++++-------
>  1 file changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c
> index 5f371d9263f3bad507236ace95b7ef323c369187..12caefd08788595be8de03a863b88b5bbc15847d 100644
> --- a/sound/soc/sof/amd/acp-ipc.c
> +++ b/sound/soc/sof/amd/acp-ipc.c
> @@ -167,6 +167,7 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context)
>  
>  	if (sdev->first_boot && sdev->fw_state != SOF_FW_BOOT_COMPLETE) {
>  		acp_mailbox_read(sdev, sdev->dsp_box.offset, &status, sizeof(status));
> +
>  		if ((status & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) {
>  			snd_sof_dsp_panic(sdev, sdev->dsp_box.offset + sizeof(status),
>  					  true);
> @@ -188,13 +189,21 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context)
>  
>  	dsp_ack = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_SCRATCH_REG_0 + dsp_ack_write);
>  	if (dsp_ack) {
> -		spin_lock_irq(&sdev->ipc_lock);
> -		/* handle immediate reply from DSP core */
> -		acp_dsp_ipc_get_reply(sdev);
> -		snd_sof_ipc_reply(sdev, 0);
> -		/* set the done bit */
> -		acp_dsp_ipc_dsp_done(sdev);
> -		spin_unlock_irq(&sdev->ipc_lock);
> +		if (likely(sdev->fw_state == SOF_FW_BOOT_COMPLETE)) {
> +			spin_lock_irq(&sdev->ipc_lock);
> +
> +			/* handle immediate reply from DSP core */
> +			acp_dsp_ipc_get_reply(sdev);
> +			snd_sof_ipc_reply(sdev, 0);
> +			/* set the done bit */
> +			acp_dsp_ipc_dsp_done(sdev);
> +
> +			spin_unlock_irq(&sdev->ipc_lock);
> +		} else {
> +			dev_dbg_ratelimited(sdev->dev, "IPC reply before FW_BOOT_COMPLETE: %#x\n",
> +					    dsp_ack);
> +		}
> +
>  		ipc_irq = true;
>  	}
>  
>
Cristian Ciocaltea Feb. 7, 2025, 12:16 p.m. UTC | #2
On 2/7/25 1:55 PM, Mukunda,Vijendar wrote:
> On 07/02/25 17:16, Cristian Ciocaltea wrote:
>> In some cases, e.g. during resuming from suspend, there is a possibility
>> that some IPC reply messages get received by the host while the DSP
>> firmware has not yet reached the complete boot state.
>>
>> Detect when this happens and do not attempt to process the unexpected
>> replies from DSP.  Instead, provide proper debugging support.
> As per our understanding, before FW boot completion there won't
> be any IPC responses sent from Firmware.
> In this case, do we really need such a condition check?

During the suspend/resume stress testing I was able to get this kind of
messages, and that's the actual reason for introducing the verification.

Also it doesn't seem to be uncommon, e.g. Intel HDA IPC also provides
similar checks.

>>
>> Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
>> ---
>>  sound/soc/sof/amd/acp-ipc.c | 23 ++++++++++++++++-------
>>  1 file changed, 16 insertions(+), 7 deletions(-)
>>
>> diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c
>> index 5f371d9263f3bad507236ace95b7ef323c369187..12caefd08788595be8de03a863b88b5bbc15847d 100644
>> --- a/sound/soc/sof/amd/acp-ipc.c
>> +++ b/sound/soc/sof/amd/acp-ipc.c
>> @@ -167,6 +167,7 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context)
>>  
>>  	if (sdev->first_boot && sdev->fw_state != SOF_FW_BOOT_COMPLETE) {
>>  		acp_mailbox_read(sdev, sdev->dsp_box.offset, &status, sizeof(status));
>> +
>>  		if ((status & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) {
>>  			snd_sof_dsp_panic(sdev, sdev->dsp_box.offset + sizeof(status),
>>  					  true);
>> @@ -188,13 +189,21 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context)
>>  
>>  	dsp_ack = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_SCRATCH_REG_0 + dsp_ack_write);
>>  	if (dsp_ack) {
>> -		spin_lock_irq(&sdev->ipc_lock);
>> -		/* handle immediate reply from DSP core */
>> -		acp_dsp_ipc_get_reply(sdev);
>> -		snd_sof_ipc_reply(sdev, 0);
>> -		/* set the done bit */
>> -		acp_dsp_ipc_dsp_done(sdev);
>> -		spin_unlock_irq(&sdev->ipc_lock);
>> +		if (likely(sdev->fw_state == SOF_FW_BOOT_COMPLETE)) {
>> +			spin_lock_irq(&sdev->ipc_lock);
>> +
>> +			/* handle immediate reply from DSP core */
>> +			acp_dsp_ipc_get_reply(sdev);
>> +			snd_sof_ipc_reply(sdev, 0);
>> +			/* set the done bit */
>> +			acp_dsp_ipc_dsp_done(sdev);
>> +
>> +			spin_unlock_irq(&sdev->ipc_lock);
>> +		} else {
>> +			dev_dbg_ratelimited(sdev->dev, "IPC reply before FW_BOOT_COMPLETE: %#x\n",
>> +					    dsp_ack);
>> +		}
>> +
>>  		ipc_irq = true;
>>  	}
>>  
>>
>
Vijendar Mukunda Feb. 7, 2025, 12:24 p.m. UTC | #3
On 07/02/25 17:46, Cristian Ciocaltea wrote:
> On 2/7/25 1:55 PM, Mukunda,Vijendar wrote:
>> On 07/02/25 17:16, Cristian Ciocaltea wrote:
>>> In some cases, e.g. during resuming from suspend, there is a possibility
>>> that some IPC reply messages get received by the host while the DSP
>>> firmware has not yet reached the complete boot state.
>>>
>>> Detect when this happens and do not attempt to process the unexpected
>>> replies from DSP.  Instead, provide proper debugging support.
>> As per our understanding, before FW boot completion there won't
>> be any IPC responses sent from Firmware.
>> In this case, do we really need such a condition check?
> During the suspend/resume stress testing I was able to get this kind of
> messages, and that's the actual reason for introducing the verification.
>
> Also it doesn't seem to be uncommon, e.g. Intel HDA IPC also provides
> similar checks.
>
Could you please share reference logs to know which IPC messages
are being received before FW_READY message/FW boot complete?
>>> Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
>>> ---
>>>  sound/soc/sof/amd/acp-ipc.c | 23 ++++++++++++++++-------
>>>  1 file changed, 16 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c
>>> index 5f371d9263f3bad507236ace95b7ef323c369187..12caefd08788595be8de03a863b88b5bbc15847d 100644
>>> --- a/sound/soc/sof/amd/acp-ipc.c
>>> +++ b/sound/soc/sof/amd/acp-ipc.c
>>> @@ -167,6 +167,7 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context)
>>>  
>>>  	if (sdev->first_boot && sdev->fw_state != SOF_FW_BOOT_COMPLETE) {
>>>  		acp_mailbox_read(sdev, sdev->dsp_box.offset, &status, sizeof(status));
>>> +
>>>  		if ((status & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) {
>>>  			snd_sof_dsp_panic(sdev, sdev->dsp_box.offset + sizeof(status),
>>>  					  true);
>>> @@ -188,13 +189,21 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context)
>>>  
>>>  	dsp_ack = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_SCRATCH_REG_0 + dsp_ack_write);
>>>  	if (dsp_ack) {
>>> -		spin_lock_irq(&sdev->ipc_lock);
>>> -		/* handle immediate reply from DSP core */
>>> -		acp_dsp_ipc_get_reply(sdev);
>>> -		snd_sof_ipc_reply(sdev, 0);
>>> -		/* set the done bit */
>>> -		acp_dsp_ipc_dsp_done(sdev);
>>> -		spin_unlock_irq(&sdev->ipc_lock);
>>> +		if (likely(sdev->fw_state == SOF_FW_BOOT_COMPLETE)) {
>>> +			spin_lock_irq(&sdev->ipc_lock);
>>> +
>>> +			/* handle immediate reply from DSP core */
>>> +			acp_dsp_ipc_get_reply(sdev);
>>> +			snd_sof_ipc_reply(sdev, 0);
>>> +			/* set the done bit */
>>> +			acp_dsp_ipc_dsp_done(sdev);
>>> +
>>> +			spin_unlock_irq(&sdev->ipc_lock);
>>> +		} else {
>>> +			dev_dbg_ratelimited(sdev->dev, "IPC reply before FW_BOOT_COMPLETE: %#x\n",
>>> +					    dsp_ack);
>>> +		}
>>> +
>>>  		ipc_irq = true;
>>>  	}
>>>  
>>>
Cristian Ciocaltea Feb. 7, 2025, 5 p.m. UTC | #4
On 2/7/25 2:24 PM, Mukunda,Vijendar wrote:
> On 07/02/25 17:46, Cristian Ciocaltea wrote:
>> On 2/7/25 1:55 PM, Mukunda,Vijendar wrote:
>>> On 07/02/25 17:16, Cristian Ciocaltea wrote:
>>>> In some cases, e.g. during resuming from suspend, there is a possibility
>>>> that some IPC reply messages get received by the host while the DSP
>>>> firmware has not yet reached the complete boot state.
>>>>
>>>> Detect when this happens and do not attempt to process the unexpected
>>>> replies from DSP.  Instead, provide proper debugging support.
>>> As per our understanding, before FW boot completion there won't
>>> be any IPC responses sent from Firmware.
>>> In this case, do we really need such a condition check?
>> During the suspend/resume stress testing I was able to get this kind of
>> messages, and that's the actual reason for introducing the verification.
>>
>> Also it doesn't seem to be uncommon, e.g. Intel HDA IPC also provides
>> similar checks.
>>
> Could you please share reference logs to know which IPC messages
> are being received before FW_READY message/FW boot complete?

As mentioned in a previous reply, I couldn't enable debugging during
stress testing because it had the annoying effect of hiding the audio
breakage issue.  I will try to experiment with some more targeted
instrumentation and see if it's possible to extract some useful data.

Regardless, I think it's worth keeping this check in place as it's
definitely helpful to spot potential FW issues (for current or future
products).

>>>> Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
>>>> ---
>>>>  sound/soc/sof/amd/acp-ipc.c | 23 ++++++++++++++++-------
>>>>  1 file changed, 16 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c
>>>> index 5f371d9263f3bad507236ace95b7ef323c369187..12caefd08788595be8de03a863b88b5bbc15847d 100644
>>>> --- a/sound/soc/sof/amd/acp-ipc.c
>>>> +++ b/sound/soc/sof/amd/acp-ipc.c
>>>> @@ -167,6 +167,7 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context)
>>>>  
>>>>  	if (sdev->first_boot && sdev->fw_state != SOF_FW_BOOT_COMPLETE) {
>>>>  		acp_mailbox_read(sdev, sdev->dsp_box.offset, &status, sizeof(status));
>>>> +
>>>>  		if ((status & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) {
>>>>  			snd_sof_dsp_panic(sdev, sdev->dsp_box.offset + sizeof(status),
>>>>  					  true);
>>>> @@ -188,13 +189,21 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context)
>>>>  
>>>>  	dsp_ack = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_SCRATCH_REG_0 + dsp_ack_write);
>>>>  	if (dsp_ack) {
>>>> -		spin_lock_irq(&sdev->ipc_lock);
>>>> -		/* handle immediate reply from DSP core */
>>>> -		acp_dsp_ipc_get_reply(sdev);
>>>> -		snd_sof_ipc_reply(sdev, 0);
>>>> -		/* set the done bit */
>>>> -		acp_dsp_ipc_dsp_done(sdev);
>>>> -		spin_unlock_irq(&sdev->ipc_lock);
>>>> +		if (likely(sdev->fw_state == SOF_FW_BOOT_COMPLETE)) {
>>>> +			spin_lock_irq(&sdev->ipc_lock);
>>>> +
>>>> +			/* handle immediate reply from DSP core */
>>>> +			acp_dsp_ipc_get_reply(sdev);
>>>> +			snd_sof_ipc_reply(sdev, 0);
>>>> +			/* set the done bit */
>>>> +			acp_dsp_ipc_dsp_done(sdev);
>>>> +
>>>> +			spin_unlock_irq(&sdev->ipc_lock);
>>>> +		} else {
>>>> +			dev_dbg_ratelimited(sdev->dev, "IPC reply before FW_BOOT_COMPLETE: %#x\n",
>>>> +					    dsp_ack);
>>>> +		}
>>>> +
>>>>  		ipc_irq = true;
>>>>  	}
>>>>  
>>>>
>
diff mbox series

Patch

diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c
index 5f371d9263f3bad507236ace95b7ef323c369187..12caefd08788595be8de03a863b88b5bbc15847d 100644
--- a/sound/soc/sof/amd/acp-ipc.c
+++ b/sound/soc/sof/amd/acp-ipc.c
@@ -167,6 +167,7 @@  irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context)
 
 	if (sdev->first_boot && sdev->fw_state != SOF_FW_BOOT_COMPLETE) {
 		acp_mailbox_read(sdev, sdev->dsp_box.offset, &status, sizeof(status));
+
 		if ((status & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) {
 			snd_sof_dsp_panic(sdev, sdev->dsp_box.offset + sizeof(status),
 					  true);
@@ -188,13 +189,21 @@  irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context)
 
 	dsp_ack = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_SCRATCH_REG_0 + dsp_ack_write);
 	if (dsp_ack) {
-		spin_lock_irq(&sdev->ipc_lock);
-		/* handle immediate reply from DSP core */
-		acp_dsp_ipc_get_reply(sdev);
-		snd_sof_ipc_reply(sdev, 0);
-		/* set the done bit */
-		acp_dsp_ipc_dsp_done(sdev);
-		spin_unlock_irq(&sdev->ipc_lock);
+		if (likely(sdev->fw_state == SOF_FW_BOOT_COMPLETE)) {
+			spin_lock_irq(&sdev->ipc_lock);
+
+			/* handle immediate reply from DSP core */
+			acp_dsp_ipc_get_reply(sdev);
+			snd_sof_ipc_reply(sdev, 0);
+			/* set the done bit */
+			acp_dsp_ipc_dsp_done(sdev);
+
+			spin_unlock_irq(&sdev->ipc_lock);
+		} else {
+			dev_dbg_ratelimited(sdev->dev, "IPC reply before FW_BOOT_COMPLETE: %#x\n",
+					    dsp_ack);
+		}
+
 		ipc_irq = true;
 	}