diff mbox series

[2/3] venus: Rework recovery mechanism

Message ID 20200730114632.6717-3-stanimir.varbanov@linaro.org (mailing list archive)
State Accepted
Commit 43e221e485e5f9707b5c6f15484f7174f82da2e4
Headers show
Series Venus - recovery from firmware crash | expand

Commit Message

Stanimir Varbanov July 30, 2020, 11:46 a.m. UTC
After power domains and clock restructuring the recovery for
sdm845 and v4 did not work properly. Fix that by reworking the
recovery function and the sequence.

Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
---
 drivers/media/platform/qcom/venus/core.c      | 24 ++++++++++---------
 drivers/media/platform/qcom/venus/hfi_venus.c | 11 ---------
 2 files changed, 13 insertions(+), 22 deletions(-)

Comments

Fritz Koenig Aug. 7, 2020, 9:29 p.m. UTC | #1
On Thu, Jul 30, 2020 at 4:47 AM Stanimir Varbanov
<stanimir.varbanov@linaro.org> wrote:
>
> After power domains and clock restructuring the recovery for
> sdm845 and v4 did not work properly. Fix that by reworking the
> recovery function and the sequence.
>
> Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
> ---
>  drivers/media/platform/qcom/venus/core.c      | 24 ++++++++++---------
>  drivers/media/platform/qcom/venus/hfi_venus.c | 11 ---------
>  2 files changed, 13 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c
> index 203c6538044f..46f6e34d435a 100644
> --- a/drivers/media/platform/qcom/venus/core.c
> +++ b/drivers/media/platform/qcom/venus/core.c
> @@ -6,6 +6,7 @@
>  #include <linux/init.h>
>  #include <linux/interconnect.h>
>  #include <linux/ioctl.h>
> +#include <linux/delay.h>
>  #include <linux/list.h>
>  #include <linux/module.h>
>  #include <linux/of_device.h>
> @@ -40,13 +41,7 @@ static void venus_event_notify(struct venus_core *core, u32 event)
>         mutex_unlock(&core->lock);
>
>         disable_irq_nosync(core->irq);
> -
> -       /*
> -        * Delay recovery to ensure venus has completed any pending cache
> -        * operations. Without this sleep, we see device reset when firmware is
> -        * unloaded after a system error.
> -        */
> -       schedule_delayed_work(&core->work, msecs_to_jiffies(100));
> +       schedule_delayed_work(&core->work, msecs_to_jiffies(10));
>  }
>
>  static const struct hfi_core_ops venus_core_ops = {
> @@ -59,23 +54,30 @@ static void venus_sys_error_handler(struct work_struct *work)
>                         container_of(work, struct venus_core, work.work);
>         int ret = 0;
>
> -       dev_warn(core->dev, "system error has occurred, starting recovery!\n");
> -
>         pm_runtime_get_sync(core->dev);
>
>         hfi_core_deinit(core, true);
> -       hfi_destroy(core);
> +
> +       dev_warn(core->dev, "system error has occurred, starting recovery!\n");
> +
>         mutex_lock(&core->lock);
> +
> +       while (pm_runtime_active(core->dev_dec) || pm_runtime_active(core->dev_enc))
> +               msleep(10);
> +
>         venus_shutdown(core);
>
>         pm_runtime_put_sync(core->dev);
>
> +       while (core->pmdomains[0] && pm_runtime_active(core->pmdomains[0]))
> +               usleep_range(1000, 1500);
> +
> +       hfi_destroy(core);
>         ret |= hfi_create(core, &venus_core_ops);
>
>         pm_runtime_get_sync(core->dev);
>
>         ret |= venus_boot(core);
> -
>         ret |= hfi_core_resume(core, true);
>
>         enable_irq(core->irq);
> diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c b/drivers/media/platform/qcom/venus/hfi_venus.c
> index 0d8855014ab3..3392fd177d22 100644
> --- a/drivers/media/platform/qcom/venus/hfi_venus.c
> +++ b/drivers/media/platform/qcom/venus/hfi_venus.c
> @@ -986,13 +986,6 @@ static void venus_process_msg_sys_error(struct venus_hfi_device *hdev,
>
>         venus_set_state(hdev, VENUS_STATE_DEINIT);
>
> -       /*
> -        * Once SYS_ERROR received from HW, it is safe to halt the AXI.
> -        * With SYS_ERROR, Venus FW may have crashed and HW might be
> -        * active and causing unnecessary transactions. Hence it is
> -        * safe to stop all AXI transactions from venus subsystem.
> -        */
> -       venus_halt_axi(hdev);
>         venus_sfr_print(hdev);
>  }
>
> @@ -1009,10 +1002,6 @@ static irqreturn_t venus_isr_thread(struct venus_core *core)
>         res = hdev->core->res;
>         pkt = hdev->pkt_buf;
>
> -       if (hdev->irq_status & WRAPPER_INTR_STATUS_A2HWD_MASK) {
> -               venus_sfr_print(hdev);
> -               hfi_process_watchdog_timeout(core);
> -       }
>
>         while (!venus_iface_msgq_read(hdev, pkt)) {
>                 msg_ret = hfi_process_msg_packet(core, pkt);
> --
> 2.17.1
>
Reviewed-by: Fritz Koenig <frkoenig@chromium.org>
diff mbox series

Patch

diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c
index 203c6538044f..46f6e34d435a 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -6,6 +6,7 @@ 
 #include <linux/init.h>
 #include <linux/interconnect.h>
 #include <linux/ioctl.h>
+#include <linux/delay.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -40,13 +41,7 @@  static void venus_event_notify(struct venus_core *core, u32 event)
 	mutex_unlock(&core->lock);
 
 	disable_irq_nosync(core->irq);
-
-	/*
-	 * Delay recovery to ensure venus has completed any pending cache
-	 * operations. Without this sleep, we see device reset when firmware is
-	 * unloaded after a system error.
-	 */
-	schedule_delayed_work(&core->work, msecs_to_jiffies(100));
+	schedule_delayed_work(&core->work, msecs_to_jiffies(10));
 }
 
 static const struct hfi_core_ops venus_core_ops = {
@@ -59,23 +54,30 @@  static void venus_sys_error_handler(struct work_struct *work)
 			container_of(work, struct venus_core, work.work);
 	int ret = 0;
 
-	dev_warn(core->dev, "system error has occurred, starting recovery!\n");
-
 	pm_runtime_get_sync(core->dev);
 
 	hfi_core_deinit(core, true);
-	hfi_destroy(core);
+
+	dev_warn(core->dev, "system error has occurred, starting recovery!\n");
+
 	mutex_lock(&core->lock);
+
+	while (pm_runtime_active(core->dev_dec) || pm_runtime_active(core->dev_enc))
+		msleep(10);
+
 	venus_shutdown(core);
 
 	pm_runtime_put_sync(core->dev);
 
+	while (core->pmdomains[0] && pm_runtime_active(core->pmdomains[0]))
+		usleep_range(1000, 1500);
+
+	hfi_destroy(core);
 	ret |= hfi_create(core, &venus_core_ops);
 
 	pm_runtime_get_sync(core->dev);
 
 	ret |= venus_boot(core);
-
 	ret |= hfi_core_resume(core, true);
 
 	enable_irq(core->irq);
diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c b/drivers/media/platform/qcom/venus/hfi_venus.c
index 0d8855014ab3..3392fd177d22 100644
--- a/drivers/media/platform/qcom/venus/hfi_venus.c
+++ b/drivers/media/platform/qcom/venus/hfi_venus.c
@@ -986,13 +986,6 @@  static void venus_process_msg_sys_error(struct venus_hfi_device *hdev,
 
 	venus_set_state(hdev, VENUS_STATE_DEINIT);
 
-	/*
-	 * Once SYS_ERROR received from HW, it is safe to halt the AXI.
-	 * With SYS_ERROR, Venus FW may have crashed and HW might be
-	 * active and causing unnecessary transactions. Hence it is
-	 * safe to stop all AXI transactions from venus subsystem.
-	 */
-	venus_halt_axi(hdev);
 	venus_sfr_print(hdev);
 }
 
@@ -1009,10 +1002,6 @@  static irqreturn_t venus_isr_thread(struct venus_core *core)
 	res = hdev->core->res;
 	pkt = hdev->pkt_buf;
 
-	if (hdev->irq_status & WRAPPER_INTR_STATUS_A2HWD_MASK) {
-		venus_sfr_print(hdev);
-		hfi_process_watchdog_timeout(core);
-	}
 
 	while (!venus_iface_msgq_read(hdev, pkt)) {
 		msg_ret = hfi_process_msg_packet(core, pkt);