diff mbox series

[3/9] remoteproc: support self recovery after rproc crash

Message ID 20220111033333.403448-6-peng.fan@oss.nxp.com (mailing list archive)
State Changes Requested
Headers show
Series [V2] remoteproc: imx_rproc: use imx specific hook for find_loaded_rsc_table | expand

Commit Message

Peng Fan (OSS) Jan. 11, 2022, 3:33 a.m. UTC
From: Peng Fan <peng.fan@nxp.com>

Current logic only support main processor to stop/start the remote
processor after rproc crash. However to SoC, such as i.MX8QM/QXP, the
remote processor could do self recovery after crash and trigger watchdog
reboot. It does not need main processor to load image, stop/start M4
core.

This patch add a new flag to indicate whether the SoC has self recovery
capability. And introduce two functions: rproc_self_recovery,
rproc_assisted_recovery for the two cases. Assisted recovery is as
before, let main processor to help recovery, while self recovery is
recover itself withou help. To self recovery, we only do detach and
attach.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
---
 drivers/remoteproc/remoteproc_core.c | 66 ++++++++++++++++++++--------
 include/linux/remoteproc.h           |  2 +
 2 files changed, 49 insertions(+), 19 deletions(-)

Comments

Mathieu Poirier Jan. 18, 2022, 6:44 p.m. UTC | #1
On Tue, Jan 11, 2022 at 11:33:27AM +0800, Peng Fan (OSS) wrote:
> From: Peng Fan <peng.fan@nxp.com>
> 
> Current logic only support main processor to stop/start the remote
> processor after rproc crash. However to SoC, such as i.MX8QM/QXP, the
> remote processor could do self recovery after crash and trigger watchdog
> reboot. It does not need main processor to load image, stop/start M4
> core.
> 
> This patch add a new flag to indicate whether the SoC has self recovery
> capability. And introduce two functions: rproc_self_recovery,
> rproc_assisted_recovery for the two cases. Assisted recovery is as
> before, let main processor to help recovery, while self recovery is
> recover itself withou help. To self recovery, we only do detach and
> attach.
> 
> Signed-off-by: Peng Fan <peng.fan@nxp.com>
> ---
>  drivers/remoteproc/remoteproc_core.c | 66 ++++++++++++++++++++--------
>  include/linux/remoteproc.h           |  2 +
>  2 files changed, 49 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index 69f51acf235e..4bd5544dab8f 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -1887,6 +1887,49 @@ static int __rproc_detach(struct rproc *rproc)
>  	return 0;
>  }
>  
> +static int rproc_self_recovery(struct rproc *rproc)
> +{
> +	int ret;
> +
> +	mutex_unlock(&rproc->lock);
> +	ret = rproc_detach(rproc);
> +	mutex_lock(&rproc->lock);
> +	if (ret)
> +		return ret;
> +
> +	if (atomic_inc_return(&rproc->power) > 1)
> +		return 0;
> +	return rproc_attach(rproc);
> +}
> +
> +static int rproc_assisted_recovery(struct rproc *rproc)
> +{
> +	const struct firmware *firmware_p;
> +	struct device *dev = &rproc->dev;
> +	int ret;
> +
> +	ret = rproc_stop(rproc, true);
> +	if (ret)
> +		return ret;
> +
> +	/* generate coredump */
> +	rproc->ops->coredump(rproc);
> +
> +	/* load firmware */
> +	ret = request_firmware(&firmware_p, rproc->firmware, dev);
> +	if (ret < 0) {
> +		dev_err(dev, "request_firmware failed: %d\n", ret);
> +		return ret;
> +	}
> +
> +	/* boot the remote processor up again */
> +	ret = rproc_start(rproc, firmware_p);
> +
> +	release_firmware(firmware_p);
> +
> +	return ret;
> +}
> +
>  /**
>   * rproc_trigger_recovery() - recover a remoteproc
>   * @rproc: the remote processor
> @@ -1901,7 +1944,6 @@ static int __rproc_detach(struct rproc *rproc)
>   */
>  int rproc_trigger_recovery(struct rproc *rproc)
>  {
> -	const struct firmware *firmware_p;
>  	struct device *dev = &rproc->dev;
>  	int ret;
>  
> @@ -1915,24 +1957,10 @@ int rproc_trigger_recovery(struct rproc *rproc)
>  
>  	dev_err(dev, "recovering %s\n", rproc->name);
>  
> -	ret = rproc_stop(rproc, true);
> -	if (ret)
> -		goto unlock_mutex;
> -
> -	/* generate coredump */
> -	rproc->ops->coredump(rproc);
> -
> -	/* load firmware */
> -	ret = request_firmware(&firmware_p, rproc->firmware, dev);
> -	if (ret < 0) {
> -		dev_err(dev, "request_firmware failed: %d\n", ret);
> -		goto unlock_mutex;
> -	}
> -
> -	/* boot the remote processor up again */
> -	ret = rproc_start(rproc, firmware_p);
> -
> -	release_firmware(firmware_p);
> +	if (rproc->self_recovery)
> +		ret = rproc_self_recovery(rproc);
> +	else
> +		ret = rproc_assisted_recovery(rproc);

The problem of how to handle crash recoveries for processors that have been
attached to is difficult.  Finding a solution for that should be on a patchset
of its own so that people can provide input.

>  
>  unlock_mutex:
>  	mutex_unlock(&rproc->lock);
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index e0600e1e5c17..b32ef46f8aa4 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -529,6 +529,7 @@ struct rproc_dump_segment {
>   * @elf_machine: firmware ELF machine
>   * @cdev: character device of the rproc
>   * @cdev_put_on_release: flag to indicate if remoteproc should be shutdown on @char_dev release
> + * @self_recovery: flag to indicate if remoteproc support self recovery
>   */
>  struct rproc {
>  	struct list_head node;
> @@ -568,6 +569,7 @@ struct rproc {
>  	u16 elf_machine;
>  	struct cdev cdev;
>  	bool cdev_put_on_release;
> +	bool self_recovery;
>  };
>  
>  /**
> -- 
> 2.25.1
>
Peng Fan Jan. 19, 2022, 2:21 a.m. UTC | #2
> Subject: Re: [PATCH 3/9] remoteproc: support self recovery after rproc crash
> 
> On Tue, Jan 11, 2022 at 11:33:27AM +0800, Peng Fan (OSS) wrote:
> > From: Peng Fan <peng.fan@nxp.com>
> >
> > Current logic only support main processor to stop/start the remote
> > processor after rproc crash. However to SoC, such as i.MX8QM/QXP, the
> > remote processor could do self recovery after crash and trigger
> > watchdog reboot. It does not need main processor to load image,
> > stop/start M4 core.
> >
> > This patch add a new flag to indicate whether the SoC has self
> > recovery capability. And introduce two functions: rproc_self_recovery,
> > rproc_assisted_recovery for the two cases. Assisted recovery is as
> > before, let main processor to help recovery, while self recovery is
> > recover itself withou help. To self recovery, we only do detach and
> > attach.
> >
> > Signed-off-by: Peng Fan <peng.fan@nxp.com>
> > ---
> >  drivers/remoteproc/remoteproc_core.c | 66
> ++++++++++++++++++++--------
> >  include/linux/remoteproc.h           |  2 +
> >  2 files changed, 49 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/remoteproc/remoteproc_core.c
> > b/drivers/remoteproc/remoteproc_core.c
> > index 69f51acf235e..4bd5544dab8f 100644
> > --- a/drivers/remoteproc/remoteproc_core.c
> > +++ b/drivers/remoteproc/remoteproc_core.c
> > @@ -1887,6 +1887,49 @@ static int __rproc_detach(struct rproc *rproc)
> >  	return 0;
> >  }
> >
> > +static int rproc_self_recovery(struct rproc *rproc) {
> > +	int ret;
> > +
> > +	mutex_unlock(&rproc->lock);
> > +	ret = rproc_detach(rproc);
> > +	mutex_lock(&rproc->lock);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (atomic_inc_return(&rproc->power) > 1)
> > +		return 0;
> > +	return rproc_attach(rproc);
> > +}
> > +
> > +static int rproc_assisted_recovery(struct rproc *rproc) {
> > +	const struct firmware *firmware_p;
> > +	struct device *dev = &rproc->dev;
> > +	int ret;
> > +
> > +	ret = rproc_stop(rproc, true);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/* generate coredump */
> > +	rproc->ops->coredump(rproc);
> > +
> > +	/* load firmware */
> > +	ret = request_firmware(&firmware_p, rproc->firmware, dev);
> > +	if (ret < 0) {
> > +		dev_err(dev, "request_firmware failed: %d\n", ret);
> > +		return ret;
> > +	}
> > +
> > +	/* boot the remote processor up again */
> > +	ret = rproc_start(rproc, firmware_p);
> > +
> > +	release_firmware(firmware_p);
> > +
> > +	return ret;
> > +}
> > +
> >  /**
> >   * rproc_trigger_recovery() - recover a remoteproc
> >   * @rproc: the remote processor
> > @@ -1901,7 +1944,6 @@ static int __rproc_detach(struct rproc *rproc)
> >   */
> >  int rproc_trigger_recovery(struct rproc *rproc)  {
> > -	const struct firmware *firmware_p;
> >  	struct device *dev = &rproc->dev;
> >  	int ret;
> >
> > @@ -1915,24 +1957,10 @@ int rproc_trigger_recovery(struct rproc
> > *rproc)
> >
> >  	dev_err(dev, "recovering %s\n", rproc->name);
> >
> > -	ret = rproc_stop(rproc, true);
> > -	if (ret)
> > -		goto unlock_mutex;
> > -
> > -	/* generate coredump */
> > -	rproc->ops->coredump(rproc);
> > -
> > -	/* load firmware */
> > -	ret = request_firmware(&firmware_p, rproc->firmware, dev);
> > -	if (ret < 0) {
> > -		dev_err(dev, "request_firmware failed: %d\n", ret);
> > -		goto unlock_mutex;
> > -	}
> > -
> > -	/* boot the remote processor up again */
> > -	ret = rproc_start(rproc, firmware_p);
> > -
> > -	release_firmware(firmware_p);
> > +	if (rproc->self_recovery)
> > +		ret = rproc_self_recovery(rproc);
> > +	else
> > +		ret = rproc_assisted_recovery(rproc);
> 
> The problem of how to handle crash recoveries for processors that have been
> attached to is difficult.  Finding a solution for that should be on a patchset of
> its own so that people can provide input.

ok, will move this patch out from the patchset.

Thanks,
Peng.

> 
> >
> >  unlock_mutex:
> >  	mutex_unlock(&rproc->lock);
> > diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> > index e0600e1e5c17..b32ef46f8aa4 100644
> > --- a/include/linux/remoteproc.h
> > +++ b/include/linux/remoteproc.h
> > @@ -529,6 +529,7 @@ struct rproc_dump_segment {
> >   * @elf_machine: firmware ELF machine
> >   * @cdev: character device of the rproc
> >   * @cdev_put_on_release: flag to indicate if remoteproc should be
> > shutdown on @char_dev release
> > + * @self_recovery: flag to indicate if remoteproc support self
> > + recovery
> >   */
> >  struct rproc {
> >  	struct list_head node;
> > @@ -568,6 +569,7 @@ struct rproc {
> >  	u16 elf_machine;
> >  	struct cdev cdev;
> >  	bool cdev_put_on_release;
> > +	bool self_recovery;
> >  };
> >
> >  /**
> > --
> > 2.25.1
> >
diff mbox series

Patch

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 69f51acf235e..4bd5544dab8f 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1887,6 +1887,49 @@  static int __rproc_detach(struct rproc *rproc)
 	return 0;
 }
 
+static int rproc_self_recovery(struct rproc *rproc)
+{
+	int ret;
+
+	mutex_unlock(&rproc->lock);
+	ret = rproc_detach(rproc);
+	mutex_lock(&rproc->lock);
+	if (ret)
+		return ret;
+
+	if (atomic_inc_return(&rproc->power) > 1)
+		return 0;
+	return rproc_attach(rproc);
+}
+
+static int rproc_assisted_recovery(struct rproc *rproc)
+{
+	const struct firmware *firmware_p;
+	struct device *dev = &rproc->dev;
+	int ret;
+
+	ret = rproc_stop(rproc, true);
+	if (ret)
+		return ret;
+
+	/* generate coredump */
+	rproc->ops->coredump(rproc);
+
+	/* load firmware */
+	ret = request_firmware(&firmware_p, rproc->firmware, dev);
+	if (ret < 0) {
+		dev_err(dev, "request_firmware failed: %d\n", ret);
+		return ret;
+	}
+
+	/* boot the remote processor up again */
+	ret = rproc_start(rproc, firmware_p);
+
+	release_firmware(firmware_p);
+
+	return ret;
+}
+
 /**
  * rproc_trigger_recovery() - recover a remoteproc
  * @rproc: the remote processor
@@ -1901,7 +1944,6 @@  static int __rproc_detach(struct rproc *rproc)
  */
 int rproc_trigger_recovery(struct rproc *rproc)
 {
-	const struct firmware *firmware_p;
 	struct device *dev = &rproc->dev;
 	int ret;
 
@@ -1915,24 +1957,10 @@  int rproc_trigger_recovery(struct rproc *rproc)
 
 	dev_err(dev, "recovering %s\n", rproc->name);
 
-	ret = rproc_stop(rproc, true);
-	if (ret)
-		goto unlock_mutex;
-
-	/* generate coredump */
-	rproc->ops->coredump(rproc);
-
-	/* load firmware */
-	ret = request_firmware(&firmware_p, rproc->firmware, dev);
-	if (ret < 0) {
-		dev_err(dev, "request_firmware failed: %d\n", ret);
-		goto unlock_mutex;
-	}
-
-	/* boot the remote processor up again */
-	ret = rproc_start(rproc, firmware_p);
-
-	release_firmware(firmware_p);
+	if (rproc->self_recovery)
+		ret = rproc_self_recovery(rproc);
+	else
+		ret = rproc_assisted_recovery(rproc);
 
 unlock_mutex:
 	mutex_unlock(&rproc->lock);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index e0600e1e5c17..b32ef46f8aa4 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -529,6 +529,7 @@  struct rproc_dump_segment {
  * @elf_machine: firmware ELF machine
  * @cdev: character device of the rproc
  * @cdev_put_on_release: flag to indicate if remoteproc should be shutdown on @char_dev release
+ * @self_recovery: flag to indicate if remoteproc support self recovery
  */
 struct rproc {
 	struct list_head node;
@@ -568,6 +569,7 @@  struct rproc {
 	u16 elf_machine;
 	struct cdev cdev;
 	bool cdev_put_on_release;
+	bool self_recovery;
 };
 
 /**