diff mbox series

[v4] remoteproc: Use unbounded workqueue for recovery work

Message ID 1650367554-15510-1-git-send-email-quic_mojha@quicinc.com (mailing list archive)
State Accepted
Headers show
Series [v4] remoteproc: Use unbounded workqueue for recovery work | expand

Commit Message

Mukesh Ojha April 19, 2022, 11:25 a.m. UTC
There could be a scenario when there is too much load on a core
(n number of tasks which is affined) or in a case when multiple
rproc subsystem is going for recovery, they queue their recovery
work to one core so even though subsystem are independent their
recovery will be delayed if one of the subsystem recovery work
is taking more time in completing.

If we make this queue unbounded, the recovery work could be picked
on any cpu. This patch is trying to address this.

Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
---
Changes in v4:
  - Removed fallback option on @sarvana comment
  - Fail the init on workqueue creation failure.
  - modified commit text.

Changes in v3:
  - Add fallback option to go back to earlier path incase recovery wq
    creation fails.

Changes in v2:
  - Removed WQ_HIGHPRI.
  - Updated commit text.


 drivers/remoteproc/remoteproc_core.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

Comments

Mukesh Ojha April 27, 2022, 8:38 a.m. UTC | #1
Missed to add cc:saravanak, sorry for that.

Addressed comment given on v3 here.

https://lore.kernel.org/lkml/20220415004417.1790161-1-saravanak@google.com/

Regards,
-Mukesh

On 4/19/2022 4:55 PM, Mukesh Ojha wrote:
> There could be a scenario when there is too much load on a core
> (n number of tasks which is affined) or in a case when multiple
> rproc subsystem is going for recovery, they queue their recovery
> work to one core so even though subsystem are independent their
> recovery will be delayed if one of the subsystem recovery work
> is taking more time in completing.
> 
> If we make this queue unbounded, the recovery work could be picked
> on any cpu. This patch is trying to address this.
> 
> Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
> ---
> Changes in v4:
>    - Removed fallback option on @sarvana comment
>    - Fail the init on workqueue creation failure.
>    - modified commit text.
>


> Changes in v3:
>    - Add fallback option to go back to earlier path incase recovery wq
>      creation fails.
> 
> Changes in v2:
>    - Removed WQ_HIGHPRI.
>    - Updated commit text.
> 
> 
>   drivers/remoteproc/remoteproc_core.c | 15 +++++++++++++--
>   1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index c510125..c8a1e3e 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -59,6 +59,7 @@ static int rproc_release_carveout(struct rproc *rproc,
>   
>   /* Unique indices for remoteproc devices */
>   static DEFINE_IDA(rproc_dev_index);
> +static struct workqueue_struct *rproc_recovery_wq;
>   
>   static const char * const rproc_crash_names[] = {
>   	[RPROC_MMUFAULT]	= "mmufault",
> @@ -2755,8 +2756,7 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
>   	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>   		rproc->name, rproc_crash_to_string(type));
>   
> -	/* Have a worker handle the error; ensure system is not suspended */
> -	queue_work(system_freezable_wq, &rproc->crash_handler);
> +	queue_work(rproc_recovery_wq, &rproc->crash_handler);
>   }
>   EXPORT_SYMBOL(rproc_report_crash);
>   
> @@ -2805,6 +2805,13 @@ static void __exit rproc_exit_panic(void)
>   
>   static int __init remoteproc_init(void)
>   {
> +	rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq",
> +						WQ_UNBOUND | WQ_FREEZABLE, 0);
> +	if (!rproc_recovery_wq) {
> +		pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
> +		return -ENOMEM;
> +	}
> +
>   	rproc_init_sysfs();
>   	rproc_init_debugfs();
>   	rproc_init_cdev();
> @@ -2818,9 +2825,13 @@ static void __exit remoteproc_exit(void)
>   {
>   	ida_destroy(&rproc_dev_index);
>   
> +	if (!rproc_recovery_wq)
> +		return;
> +
>   	rproc_exit_panic();
>   	rproc_exit_debugfs();
>   	rproc_exit_sysfs();
> +	destroy_workqueue(rproc_recovery_wq);
>   }
>   module_exit(remoteproc_exit);
>
Mukesh Ojha July 1, 2022, 2:56 p.m. UTC | #2
Hi Bjorn/Mathieu,

Could you please review this ?

-Mukesh

On 4/19/2022 4:55 PM, Mukesh Ojha wrote:
> There could be a scenario when there is too much load on a core
> (n number of tasks which is affined) or in a case when multiple
> rproc subsystem is going for recovery, they queue their recovery
> work to one core so even though subsystem are independent their
> recovery will be delayed if one of the subsystem recovery work
> is taking more time in completing.
> 
> If we make this queue unbounded, the recovery work could be picked
> on any cpu. This patch is trying to address this.
> 
> Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
> ---
> Changes in v4:
>    - Removed fallback option on @sarvana comment
>    - Fail the init on workqueue creation failure.
>    - modified commit text.
> 
> Changes in v3:
>    - Add fallback option to go back to earlier path incase recovery wq
>      creation fails.
> 
> Changes in v2:
>    - Removed WQ_HIGHPRI.
>    - Updated commit text.
> 
> 
>   drivers/remoteproc/remoteproc_core.c | 15 +++++++++++++--
>   1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index c510125..c8a1e3e 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -59,6 +59,7 @@ static int rproc_release_carveout(struct rproc *rproc,
>   
>   /* Unique indices for remoteproc devices */
>   static DEFINE_IDA(rproc_dev_index);
> +static struct workqueue_struct *rproc_recovery_wq;
>   
>   static const char * const rproc_crash_names[] = {
>   	[RPROC_MMUFAULT]	= "mmufault",
> @@ -2755,8 +2756,7 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
>   	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>   		rproc->name, rproc_crash_to_string(type));
>   
> -	/* Have a worker handle the error; ensure system is not suspended */
> -	queue_work(system_freezable_wq, &rproc->crash_handler);
> +	queue_work(rproc_recovery_wq, &rproc->crash_handler);
>   }
>   EXPORT_SYMBOL(rproc_report_crash);
>   
> @@ -2805,6 +2805,13 @@ static void __exit rproc_exit_panic(void)
>   
>   static int __init remoteproc_init(void)
>   {
> +	rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq",
> +						WQ_UNBOUND | WQ_FREEZABLE, 0);
> +	if (!rproc_recovery_wq) {
> +		pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
> +		return -ENOMEM;
> +	}
> +
>   	rproc_init_sysfs();
>   	rproc_init_debugfs();
>   	rproc_init_cdev();
> @@ -2818,9 +2825,13 @@ static void __exit remoteproc_exit(void)
>   {
>   	ida_destroy(&rproc_dev_index);
>   
> +	if (!rproc_recovery_wq)
> +		return;
> +
>   	rproc_exit_panic();
>   	rproc_exit_debugfs();
>   	rproc_exit_sysfs();
> +	destroy_workqueue(rproc_recovery_wq);
>   }
>   module_exit(remoteproc_exit);
>
Bjorn Andersson July 18, 2022, 10:59 p.m. UTC | #3
On Tue, 19 Apr 2022 16:55:54 +0530, Mukesh Ojha wrote:
> There could be a scenario when there is too much load on a core
> (n number of tasks which is affined) or in a case when multiple
> rproc subsystem is going for recovery, they queue their recovery
> work to one core so even though subsystem are independent their
> recovery will be delayed if one of the subsystem recovery work
> is taking more time in completing.
> 
> [...]

Applied, thanks!

[1/1] remoteproc: Use unbounded workqueue for recovery work
      commit: 0a7f201e4284b43a60ee286c0921002c7e42413d

Best regards,
diff mbox series

Patch

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index c510125..c8a1e3e 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -59,6 +59,7 @@  static int rproc_release_carveout(struct rproc *rproc,
 
 /* Unique indices for remoteproc devices */
 static DEFINE_IDA(rproc_dev_index);
+static struct workqueue_struct *rproc_recovery_wq;
 
 static const char * const rproc_crash_names[] = {
 	[RPROC_MMUFAULT]	= "mmufault",
@@ -2755,8 +2756,7 @@  void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
 	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
 		rproc->name, rproc_crash_to_string(type));
 
-	/* Have a worker handle the error; ensure system is not suspended */
-	queue_work(system_freezable_wq, &rproc->crash_handler);
+	queue_work(rproc_recovery_wq, &rproc->crash_handler);
 }
 EXPORT_SYMBOL(rproc_report_crash);
 
@@ -2805,6 +2805,13 @@  static void __exit rproc_exit_panic(void)
 
 static int __init remoteproc_init(void)
 {
+	rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq",
+						WQ_UNBOUND | WQ_FREEZABLE, 0);
+	if (!rproc_recovery_wq) {
+		pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
+		return -ENOMEM;
+	}
+
 	rproc_init_sysfs();
 	rproc_init_debugfs();
 	rproc_init_cdev();
@@ -2818,9 +2825,13 @@  static void __exit remoteproc_exit(void)
 {
 	ida_destroy(&rproc_dev_index);
 
+	if (!rproc_recovery_wq)
+		return;
+
 	rproc_exit_panic();
 	rproc_exit_debugfs();
 	rproc_exit_sysfs();
+	destroy_workqueue(rproc_recovery_wq);
 }
 module_exit(remoteproc_exit);