diff mbox series

[3/7] ASoC: SOF: core: release resources on errors in probe_continue

Message ID 20200124213625.30186-4-pierre-louis.bossart@linux.intel.com (mailing list archive)
State Accepted
Commit 410e5e55c9c1c9c0d452ac5b9adb37b933a7747e
Headers show
Series ASoC: SOF: fixes for 5.6 | expand

Commit Message

Pierre-Louis Bossart Jan. 24, 2020, 9:36 p.m. UTC
The initial intent of releasing resources in the .remove does not work
well with HDaudio codecs. If the probe_continue() fails in a work
queue, e.g. due to missing firmware or authentication issues, we don't
release any resources, and as a result the kernel oopses during
suspend operations.

The suggested fix is to release all resources during errors in
probe_continue(), and use fw_state to track resource allocation
state, so that .remove does not attempt to release the same
hardware resources twice. PM operations are also modified so that
no action is done if DSP resources have been freed due to
an error at probe.

Reported-by: Takashi Iwai <tiwai@suse.de>
Co-developed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Signed-off-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Bugzilla:  http://bugzilla.suse.com/show_bug.cgi?id=1161246
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
---
 sound/soc/sof/core.c | 33 ++++++++++++---------------------
 sound/soc/sof/pm.c   |  4 ++++
 2 files changed, 16 insertions(+), 21 deletions(-)

Comments

Takashi Iwai Jan. 25, 2020, 10:39 a.m. UTC | #1
On Fri, 24 Jan 2020 22:36:21 +0100,
Pierre-Louis Bossart wrote:
> 
> The initial intent of releasing resources in the .remove does not work
> well with HDaudio codecs. If the probe_continue() fails in a work
> queue, e.g. due to missing firmware or authentication issues, we don't
> release any resources, and as a result the kernel oopses during
> suspend operations.
> 
> The suggested fix is to release all resources during errors in
> probe_continue(), and use fw_state to track resource allocation
> state, so that .remove does not attempt to release the same
> hardware resources twice. PM operations are also modified so that
> no action is done if DSP resources have been freed due to
> an error at probe.
> 
> Reported-by: Takashi Iwai <tiwai@suse.de>
> Co-developed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
> Signed-off-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
> Bugzilla:  http://bugzilla.suse.com/show_bug.cgi?id=1161246
> Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>

This deserves for Cc to stable, as the bug already hits on both 5.4
and 5.5 kernels.

Reviewed-by: Takashi Iwai <tiwai@suse.de>


thanks,

Takashi

> ---
>  sound/soc/sof/core.c | 33 ++++++++++++---------------------
>  sound/soc/sof/pm.c   |  4 ++++
>  2 files changed, 16 insertions(+), 21 deletions(-)
> 
> diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c
> index f517ab448a1d..34cefbaf2d2a 100644
> --- a/sound/soc/sof/core.c
> +++ b/sound/soc/sof/core.c
> @@ -244,7 +244,6 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
>  
>  	return 0;
>  
> -#if !IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)
>  fw_trace_err:
>  	snd_sof_free_trace(sdev);
>  fw_run_err:
> @@ -255,22 +254,10 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
>  	snd_sof_free_debug(sdev);
>  dbg_err:
>  	snd_sof_remove(sdev);
> -#else
>  
> -	/*
> -	 * when the probe_continue is handled in a work queue, the
> -	 * probe does not fail so we don't release resources here.
> -	 * They will be released with an explicit call to
> -	 * snd_sof_device_remove() when the PCI/ACPI device is removed
> -	 */
> -
> -fw_trace_err:
> -fw_run_err:
> -fw_load_err:
> -ipc_err:
> -dbg_err:
> -
> -#endif
> +	/* all resources freed, update state to match */
> +	sdev->fw_state = SOF_FW_BOOT_NOT_STARTED;
> +	sdev->first_boot = true;
>  
>  	return ret;
>  }
> @@ -353,10 +340,12 @@ int snd_sof_device_remove(struct device *dev)
>  	if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE))
>  		cancel_work_sync(&sdev->probe_work);
>  
> -	snd_sof_fw_unload(sdev);
> -	snd_sof_ipc_free(sdev);
> -	snd_sof_free_debug(sdev);
> -	snd_sof_free_trace(sdev);
> +	if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) {
> +		snd_sof_fw_unload(sdev);
> +		snd_sof_ipc_free(sdev);
> +		snd_sof_free_debug(sdev);
> +		snd_sof_free_trace(sdev);
> +	}
>  
>  	/*
>  	 * Unregister machine driver. This will unbind the snd_card which
> @@ -364,13 +353,15 @@ int snd_sof_device_remove(struct device *dev)
>  	 * before freeing the snd_card.
>  	 */
>  	snd_sof_machine_unregister(sdev, pdata);
> +
>  	/*
>  	 * Unregistering the machine driver results in unloading the topology.
>  	 * Some widgets, ex: scheduler, attempt to power down the core they are
>  	 * scheduled on, when they are unloaded. Therefore, the DSP must be
>  	 * removed only after the topology has been unloaded.
>  	 */
> -	snd_sof_remove(sdev);
> +	if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED)
> +		snd_sof_remove(sdev);
>  
>  	/* release firmware */
>  	release_firmware(pdata->fw);
> diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c
> index 84290bbeebdd..a0cde053b61a 100644
> --- a/sound/soc/sof/pm.c
> +++ b/sound/soc/sof/pm.c
> @@ -56,6 +56,10 @@ static int sof_resume(struct device *dev, bool runtime_resume)
>  	if (!sof_ops(sdev)->resume || !sof_ops(sdev)->runtime_resume)
>  		return 0;
>  
> +	/* DSP was never successfully started, nothing to resume */
> +	if (sdev->first_boot)
> +		return 0;
> +
>  	/*
>  	 * if the runtime_resume flag is set, call the runtime_resume routine
>  	 * or else call the system resume routine
> -- 
> 2.20.1
>
Pierre-Louis Bossart Jan. 27, 2020, 3:08 p.m. UTC | #2
On 1/25/20 4:39 AM, Takashi Iwai wrote:
> On Fri, 24 Jan 2020 22:36:21 +0100,
> Pierre-Louis Bossart wrote:
>>
>> The initial intent of releasing resources in the .remove does not work
>> well with HDaudio codecs. If the probe_continue() fails in a work
>> queue, e.g. due to missing firmware or authentication issues, we don't
>> release any resources, and as a result the kernel oopses during
>> suspend operations.
>>
>> The suggested fix is to release all resources during errors in
>> probe_continue(), and use fw_state to track resource allocation
>> state, so that .remove does not attempt to release the same
>> hardware resources twice. PM operations are also modified so that
>> no action is done if DSP resources have been freed due to
>> an error at probe.
>>
>> Reported-by: Takashi Iwai <tiwai@suse.de>
>> Co-developed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
>> Signed-off-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
>> Bugzilla:  http://bugzilla.suse.com/show_bug.cgi?id=1161246
>> Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
> 
> This deserves for Cc to stable, as the bug already hits on both 5.4
> and 5.5 kernels.
> 
> Reviewed-by: Takashi Iwai <tiwai@suse.de>

Patch 2 would need to be Cc:'ed to stable as well, otherwise this patch3 
will not apply.
diff mbox series

Patch

diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c
index f517ab448a1d..34cefbaf2d2a 100644
--- a/sound/soc/sof/core.c
+++ b/sound/soc/sof/core.c
@@ -244,7 +244,6 @@  static int sof_probe_continue(struct snd_sof_dev *sdev)
 
 	return 0;
 
-#if !IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)
 fw_trace_err:
 	snd_sof_free_trace(sdev);
 fw_run_err:
@@ -255,22 +254,10 @@  static int sof_probe_continue(struct snd_sof_dev *sdev)
 	snd_sof_free_debug(sdev);
 dbg_err:
 	snd_sof_remove(sdev);
-#else
 
-	/*
-	 * when the probe_continue is handled in a work queue, the
-	 * probe does not fail so we don't release resources here.
-	 * They will be released with an explicit call to
-	 * snd_sof_device_remove() when the PCI/ACPI device is removed
-	 */
-
-fw_trace_err:
-fw_run_err:
-fw_load_err:
-ipc_err:
-dbg_err:
-
-#endif
+	/* all resources freed, update state to match */
+	sdev->fw_state = SOF_FW_BOOT_NOT_STARTED;
+	sdev->first_boot = true;
 
 	return ret;
 }
@@ -353,10 +340,12 @@  int snd_sof_device_remove(struct device *dev)
 	if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE))
 		cancel_work_sync(&sdev->probe_work);
 
-	snd_sof_fw_unload(sdev);
-	snd_sof_ipc_free(sdev);
-	snd_sof_free_debug(sdev);
-	snd_sof_free_trace(sdev);
+	if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) {
+		snd_sof_fw_unload(sdev);
+		snd_sof_ipc_free(sdev);
+		snd_sof_free_debug(sdev);
+		snd_sof_free_trace(sdev);
+	}
 
 	/*
 	 * Unregister machine driver. This will unbind the snd_card which
@@ -364,13 +353,15 @@  int snd_sof_device_remove(struct device *dev)
 	 * before freeing the snd_card.
 	 */
 	snd_sof_machine_unregister(sdev, pdata);
+
 	/*
 	 * Unregistering the machine driver results in unloading the topology.
 	 * Some widgets, ex: scheduler, attempt to power down the core they are
 	 * scheduled on, when they are unloaded. Therefore, the DSP must be
 	 * removed only after the topology has been unloaded.
 	 */
-	snd_sof_remove(sdev);
+	if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED)
+		snd_sof_remove(sdev);
 
 	/* release firmware */
 	release_firmware(pdata->fw);
diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c
index 84290bbeebdd..a0cde053b61a 100644
--- a/sound/soc/sof/pm.c
+++ b/sound/soc/sof/pm.c
@@ -56,6 +56,10 @@  static int sof_resume(struct device *dev, bool runtime_resume)
 	if (!sof_ops(sdev)->resume || !sof_ops(sdev)->runtime_resume)
 		return 0;
 
+	/* DSP was never successfully started, nothing to resume */
+	if (sdev->first_boot)
+		return 0;
+
 	/*
 	 * if the runtime_resume flag is set, call the runtime_resume routine
 	 * or else call the system resume routine