diff mbox series

remoteproc: k3-r5: Wait for core0 power-up before powering up core1

Message ID 20230906124756.3480579-1-a-nandan@ti.com (mailing list archive)
State New, archived
Headers show
Series remoteproc: k3-r5: Wait for core0 power-up before powering up core1 | expand

Commit Message

Apurva Nandan Sept. 6, 2023, 12:47 p.m. UTC
PSC controller has a limitation that it can only power-up the second core
when the first core is in ON state. Power-state for core0 should be equal
to or higher than core1, else the kernel is seen hanging during rproc
loading.

Make the powering up of cores sequential, by waiting for the current core
to power-up before proceeding to the next core, with a timeout of 2sec.
Add a wait queue event in k3_r5_cluster_rproc_init call, that will wait
for the current core to be released from reset before proceeding with the
next core.

Fixes: 6dedbd1d5443 ("remoteproc: k3-r5: Add a remoteproc driver for R5F subsystem")

Signed-off-by: Apurva Nandan <a-nandan@ti.com>
---

 kpv report: https://gist.githubusercontent.com/apurvanandan1997/feb3b304121c265b7827be43752b7ae8/raw

 drivers/remoteproc/ti_k3_r5_remoteproc.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

Comments

Mathieu Poirier Sept. 11, 2023, 4:45 p.m. UTC | #1
Hi Apurva,

On Wed, Sep 06, 2023 at 06:17:56PM +0530, Apurva Nandan wrote:
> PSC controller has a limitation that it can only power-up the second core
> when the first core is in ON state. Power-state for core0 should be equal
> to or higher than core1, else the kernel is seen hanging during rproc
> loading.
> 
> Make the powering up of cores sequential, by waiting for the current core
> to power-up before proceeding to the next core, with a timeout of 2sec.
> Add a wait queue event in k3_r5_cluster_rproc_init call, that will wait
> for the current core to be released from reset before proceeding with the
> next core.
> 
> Fixes: 6dedbd1d5443 ("remoteproc: k3-r5: Add a remoteproc driver for R5F subsystem")
> 
> Signed-off-by: Apurva Nandan <a-nandan@ti.com>
> ---
> 
>  kpv report: https://gist.githubusercontent.com/apurvanandan1997/feb3b304121c265b7827be43752b7ae8/raw
> 
>  drivers/remoteproc/ti_k3_r5_remoteproc.c | 23 +++++++++++++++++++++++
>  1 file changed, 23 insertions(+)
> 
> diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c
> index ad3415a3851b..ba5e503f7c9c 100644
> --- a/drivers/remoteproc/ti_k3_r5_remoteproc.c
> +++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c
> @@ -103,12 +103,14 @@ struct k3_r5_soc_data {
>   * @dev: cached device pointer
>   * @mode: Mode to configure the Cluster - Split or LockStep
>   * @cores: list of R5 cores within the cluster
> + * @core_transition: wait queue to sync core state changes
>   * @soc_data: SoC-specific feature data for a R5FSS
>   */
>  struct k3_r5_cluster {
>  	struct device *dev;
>  	enum cluster_mode mode;
>  	struct list_head cores;
> +	wait_queue_head_t core_transition;
>  	const struct k3_r5_soc_data *soc_data;
>  };
>  
> @@ -128,6 +130,7 @@ struct k3_r5_cluster {
>   * @atcm_enable: flag to control ATCM enablement
>   * @btcm_enable: flag to control BTCM enablement
>   * @loczrama: flag to dictate which TCM is at device address 0x0
> + * @released_from_reset: flag to signal when core is out of reset
>   */
>  struct k3_r5_core {
>  	struct list_head elem;
> @@ -144,6 +147,7 @@ struct k3_r5_core {
>  	u32 atcm_enable;
>  	u32 btcm_enable;
>  	u32 loczrama;
> +	bool released_from_reset;
>  };
>  
>  /**
> @@ -460,6 +464,8 @@ static int k3_r5_rproc_prepare(struct rproc *rproc)
>  			ret);
>  		return ret;
>  	}
> +	core->released_from_reset = true;
> +	wake_up_interruptible(&cluster->core_transition);
>  
>  	/*
>  	 * Newer IP revisions like on J7200 SoCs support h/w auto-initialization
> @@ -1140,6 +1146,7 @@ static int k3_r5_rproc_configure_mode(struct k3_r5_rproc *kproc)
>  		return ret;
>  	}
>  
> +	core->released_from_reset = c_state;
>  	ret = ti_sci_proc_get_status(core->tsp, &boot_vec, &cfg, &ctrl,
>  				     &stat);
>  	if (ret < 0) {
> @@ -1280,6 +1287,21 @@ static int k3_r5_cluster_rproc_init(struct platform_device *pdev)
>  		    cluster->mode == CLUSTER_MODE_SINGLECPU ||
>  		    cluster->mode == CLUSTER_MODE_SINGLECORE)
>  			break;
> +
> +		/* R5 cores require to be powered on sequentially, core0
> +		 * should be in higher power state than core1 in a cluster
> +		 * So, wait for current core to power up before proceeding
> +		 * to next core and put timeout of 2sec for each core.
> +		 */

Wrong multi-line comment format.

> +		ret = wait_event_interruptible_timeout(cluster->core_transition,
> +						       core->released_from_reset,
> +						       msecs_to_jiffies(2000));
> +		if (ret <= 0) {
> +			dev_err(dev,
> +				"Timed out waiting for %s core to power up!\n",
> +				rproc->name);
> +			return ret;
> +		}

From my perspective, this is needed because rproc_auto_boot_callback() for core1
can be called before core0 due to thread execution order.  Am I correct?  

If so please add this explanation to the comment you have above.  Also, let's
say a user decides to switch both cores off after reboot.  At that time, what
prevents a user from switching on core1 before core0 via sysfs?

Thanks,
Mathieu

>  	}
>  
>  	return 0;
> @@ -1709,6 +1731,7 @@ static int k3_r5_probe(struct platform_device *pdev)
>  	cluster->dev = dev;
>  	cluster->soc_data = data;
>  	INIT_LIST_HEAD(&cluster->cores);
> +	init_waitqueue_head(&cluster->core_transition);
>  
>  	ret = of_property_read_u32(np, "ti,cluster-mode", &cluster->mode);
>  	if (ret < 0 && ret != -EINVAL) {
> -- 
> 2.34.1
>
Apurva Nandan Oct. 4, 2023, 1:51 p.m. UTC | #2
Hi Mathieu,

On 11/09/23 22:15, Mathieu Poirier wrote:
> Hi Apurva,
>
> On Wed, Sep 06, 2023 at 06:17:56PM +0530, Apurva Nandan wrote:
>> PSC controller has a limitation that it can only power-up the second core
>> when the first core is in ON state. Power-state for core0 should be equal
>> to or higher than core1, else the kernel is seen hanging during rproc
>> loading.
>>
>> Make the powering up of cores sequential, by waiting for the current core
>> to power-up before proceeding to the next core, with a timeout of 2sec.
>> Add a wait queue event in k3_r5_cluster_rproc_init call, that will wait
>> for the current core to be released from reset before proceeding with the
>> next core.
>>
>> Fixes: 6dedbd1d5443 ("remoteproc: k3-r5: Add a remoteproc driver for R5F subsystem")
>>
>> Signed-off-by: Apurva Nandan <a-nandan@ti.com>
>> ---
>>
>>   kpv report: https://gist.githubusercontent.com/apurvanandan1997/feb3b304121c265b7827be43752b7ae8/raw
>>
>>   drivers/remoteproc/ti_k3_r5_remoteproc.c | 23 +++++++++++++++++++++++
>>   1 file changed, 23 insertions(+)
>>
>> diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c
>> index ad3415a3851b..ba5e503f7c9c 100644
>> --- a/drivers/remoteproc/ti_k3_r5_remoteproc.c
>> +++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c
>> @@ -103,12 +103,14 @@ struct k3_r5_soc_data {
>>    * @dev: cached device pointer
>>    * @mode: Mode to configure the Cluster - Split or LockStep
>>    * @cores: list of R5 cores within the cluster
>> + * @core_transition: wait queue to sync core state changes
>>    * @soc_data: SoC-specific feature data for a R5FSS
>>    */
>>   struct k3_r5_cluster {
>>   	struct device *dev;
>>   	enum cluster_mode mode;
>>   	struct list_head cores;
>> +	wait_queue_head_t core_transition;
>>   	const struct k3_r5_soc_data *soc_data;
>>   };
>>   
>> @@ -128,6 +130,7 @@ struct k3_r5_cluster {
>>    * @atcm_enable: flag to control ATCM enablement
>>    * @btcm_enable: flag to control BTCM enablement
>>    * @loczrama: flag to dictate which TCM is at device address 0x0
>> + * @released_from_reset: flag to signal when core is out of reset
>>    */
>>   struct k3_r5_core {
>>   	struct list_head elem;
>> @@ -144,6 +147,7 @@ struct k3_r5_core {
>>   	u32 atcm_enable;
>>   	u32 btcm_enable;
>>   	u32 loczrama;
>> +	bool released_from_reset;
>>   };
>>   
>>   /**
>> @@ -460,6 +464,8 @@ static int k3_r5_rproc_prepare(struct rproc *rproc)
>>   			ret);
>>   		return ret;
>>   	}
>> +	core->released_from_reset = true;
>> +	wake_up_interruptible(&cluster->core_transition);
>>   
>>   	/*
>>   	 * Newer IP revisions like on J7200 SoCs support h/w auto-initialization
>> @@ -1140,6 +1146,7 @@ static int k3_r5_rproc_configure_mode(struct k3_r5_rproc *kproc)
>>   		return ret;
>>   	}
>>   
>> +	core->released_from_reset = c_state;
>>   	ret = ti_sci_proc_get_status(core->tsp, &boot_vec, &cfg, &ctrl,
>>   				     &stat);
>>   	if (ret < 0) {
>> @@ -1280,6 +1287,21 @@ static int k3_r5_cluster_rproc_init(struct platform_device *pdev)
>>   		    cluster->mode == CLUSTER_MODE_SINGLECPU ||
>>   		    cluster->mode == CLUSTER_MODE_SINGLECORE)
>>   			break;
>> +
>> +		/* R5 cores require to be powered on sequentially, core0
>> +		 * should be in higher power state than core1 in a cluster
>> +		 * So, wait for current core to power up before proceeding
>> +		 * to next core and put timeout of 2sec for each core.
>> +		 */
> Wrong multi-line comment format.
Okay will fix this.
>> +		ret = wait_event_interruptible_timeout(cluster->core_transition,
>> +						       core->released_from_reset,
>> +						       msecs_to_jiffies(2000));
>> +		if (ret <= 0) {
>> +			dev_err(dev,
>> +				"Timed out waiting for %s core to power up!\n",
>> +				rproc->name);
>> +			return ret;
>> +		}
>  From my perspective, this is needed because rproc_auto_boot_callback() for core1
> can be called before core0 due to thread execution order.  Am I correct?
Yes
> If so please add this explanation to the comment you have above.  Also, let's
> say a user decides to switch both cores off after reboot.  At that time, what
> prevents a user from switching on core1 before core0 via sysfs?
Okay, will add the explanation.
Currently, adding support for graceful shutdown is in progress. As of 
now in order
to stop/start core or change firmware, we recommend users to restart the 
OS.
> Thanks,
> Mathieu
>
>>   	}
>>   
>>   	return 0;
>> @@ -1709,6 +1731,7 @@ static int k3_r5_probe(struct platform_device *pdev)
>>   	cluster->dev = dev;
>>   	cluster->soc_data = data;
>>   	INIT_LIST_HEAD(&cluster->cores);
>> +	init_waitqueue_head(&cluster->core_transition);
>>   
>>   	ret = of_property_read_u32(np, "ti,cluster-mode", &cluster->mode);
>>   	if (ret < 0 && ret != -EINVAL) {
>> -- 
>> 2.34.1
>>
Mathieu Poirier Oct. 4, 2023, 5:30 p.m. UTC | #3
On Wed, 4 Oct 2023 at 07:51, Apurva Nandan <a-nandan@ti.com> wrote:
>
> Hi Mathieu,
>
> On 11/09/23 22:15, Mathieu Poirier wrote:
> > Hi Apurva,
> >
> > On Wed, Sep 06, 2023 at 06:17:56PM +0530, Apurva Nandan wrote:
> >> PSC controller has a limitation that it can only power-up the second core
> >> when the first core is in ON state. Power-state for core0 should be equal
> >> to or higher than core1, else the kernel is seen hanging during rproc
> >> loading.
> >>
> >> Make the powering up of cores sequential, by waiting for the current core
> >> to power-up before proceeding to the next core, with a timeout of 2sec.
> >> Add a wait queue event in k3_r5_cluster_rproc_init call, that will wait
> >> for the current core to be released from reset before proceeding with the
> >> next core.
> >>
> >> Fixes: 6dedbd1d5443 ("remoteproc: k3-r5: Add a remoteproc driver for R5F subsystem")
> >>
> >> Signed-off-by: Apurva Nandan <a-nandan@ti.com>
> >> ---
> >>
> >>   kpv report: https://gist.githubusercontent.com/apurvanandan1997/feb3b304121c265b7827be43752b7ae8/raw
> >>
> >>   drivers/remoteproc/ti_k3_r5_remoteproc.c | 23 +++++++++++++++++++++++
> >>   1 file changed, 23 insertions(+)
> >>
> >> diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c
> >> index ad3415a3851b..ba5e503f7c9c 100644
> >> --- a/drivers/remoteproc/ti_k3_r5_remoteproc.c
> >> +++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c
> >> @@ -103,12 +103,14 @@ struct k3_r5_soc_data {
> >>    * @dev: cached device pointer
> >>    * @mode: Mode to configure the Cluster - Split or LockStep
> >>    * @cores: list of R5 cores within the cluster
> >> + * @core_transition: wait queue to sync core state changes
> >>    * @soc_data: SoC-specific feature data for a R5FSS
> >>    */
> >>   struct k3_r5_cluster {
> >>      struct device *dev;
> >>      enum cluster_mode mode;
> >>      struct list_head cores;
> >> +    wait_queue_head_t core_transition;
> >>      const struct k3_r5_soc_data *soc_data;
> >>   };
> >>
> >> @@ -128,6 +130,7 @@ struct k3_r5_cluster {
> >>    * @atcm_enable: flag to control ATCM enablement
> >>    * @btcm_enable: flag to control BTCM enablement
> >>    * @loczrama: flag to dictate which TCM is at device address 0x0
> >> + * @released_from_reset: flag to signal when core is out of reset
> >>    */
> >>   struct k3_r5_core {
> >>      struct list_head elem;
> >> @@ -144,6 +147,7 @@ struct k3_r5_core {
> >>      u32 atcm_enable;
> >>      u32 btcm_enable;
> >>      u32 loczrama;
> >> +    bool released_from_reset;
> >>   };
> >>
> >>   /**
> >> @@ -460,6 +464,8 @@ static int k3_r5_rproc_prepare(struct rproc *rproc)
> >>                      ret);
> >>              return ret;
> >>      }
> >> +    core->released_from_reset = true;
> >> +    wake_up_interruptible(&cluster->core_transition);
> >>
> >>      /*
> >>       * Newer IP revisions like on J7200 SoCs support h/w auto-initialization
> >> @@ -1140,6 +1146,7 @@ static int k3_r5_rproc_configure_mode(struct k3_r5_rproc *kproc)
> >>              return ret;
> >>      }
> >>
> >> +    core->released_from_reset = c_state;
> >>      ret = ti_sci_proc_get_status(core->tsp, &boot_vec, &cfg, &ctrl,
> >>                                   &stat);
> >>      if (ret < 0) {
> >> @@ -1280,6 +1287,21 @@ static int k3_r5_cluster_rproc_init(struct platform_device *pdev)
> >>                  cluster->mode == CLUSTER_MODE_SINGLECPU ||
> >>                  cluster->mode == CLUSTER_MODE_SINGLECORE)
> >>                      break;
> >> +
> >> +            /* R5 cores require to be powered on sequentially, core0
> >> +             * should be in higher power state than core1 in a cluster
> >> +             * So, wait for current core to power up before proceeding
> >> +             * to next core and put timeout of 2sec for each core.
> >> +             */
> > Wrong multi-line comment format.
> Okay will fix this.
> >> +            ret = wait_event_interruptible_timeout(cluster->core_transition,
> >> +                                                   core->released_from_reset,
> >> +                                                   msecs_to_jiffies(2000));
> >> +            if (ret <= 0) {
> >> +                    dev_err(dev,
> >> +                            "Timed out waiting for %s core to power up!\n",
> >> +                            rproc->name);
> >> +                    return ret;
> >> +            }
> >  From my perspective, this is needed because rproc_auto_boot_callback() for core1
> > can be called before core0 due to thread execution order.  Am I correct?
> Yes
> > If so please add this explanation to the comment you have above.  Also, let's
> > say a user decides to switch both cores off after reboot.  At that time, what
> > prevents a user from switching on core1 before core0 via sysfs?
> Okay, will add the explanation.
> Currently, adding support for graceful shutdown is in progress. As of
> now in order
> to stop/start core or change firmware, we recommend users to restart the
> OS.

You will need to address access via debugfs and sysfs if you want this
patch to move forward.

> > Thanks,
> > Mathieu
> >
> >>      }
> >>
> >>      return 0;
> >> @@ -1709,6 +1731,7 @@ static int k3_r5_probe(struct platform_device *pdev)
> >>      cluster->dev = dev;
> >>      cluster->soc_data = data;
> >>      INIT_LIST_HEAD(&cluster->cores);
> >> +    init_waitqueue_head(&cluster->core_transition);
> >>
> >>      ret = of_property_read_u32(np, "ti,cluster-mode", &cluster->mode);
> >>      if (ret < 0 && ret != -EINVAL) {
> >> --
> >> 2.34.1
> >>
diff mbox series

Patch

diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c
index ad3415a3851b..ba5e503f7c9c 100644
--- a/drivers/remoteproc/ti_k3_r5_remoteproc.c
+++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c
@@ -103,12 +103,14 @@  struct k3_r5_soc_data {
  * @dev: cached device pointer
  * @mode: Mode to configure the Cluster - Split or LockStep
  * @cores: list of R5 cores within the cluster
+ * @core_transition: wait queue to sync core state changes
  * @soc_data: SoC-specific feature data for a R5FSS
  */
 struct k3_r5_cluster {
 	struct device *dev;
 	enum cluster_mode mode;
 	struct list_head cores;
+	wait_queue_head_t core_transition;
 	const struct k3_r5_soc_data *soc_data;
 };
 
@@ -128,6 +130,7 @@  struct k3_r5_cluster {
  * @atcm_enable: flag to control ATCM enablement
  * @btcm_enable: flag to control BTCM enablement
  * @loczrama: flag to dictate which TCM is at device address 0x0
+ * @released_from_reset: flag to signal when core is out of reset
  */
 struct k3_r5_core {
 	struct list_head elem;
@@ -144,6 +147,7 @@  struct k3_r5_core {
 	u32 atcm_enable;
 	u32 btcm_enable;
 	u32 loczrama;
+	bool released_from_reset;
 };
 
 /**
@@ -460,6 +464,8 @@  static int k3_r5_rproc_prepare(struct rproc *rproc)
 			ret);
 		return ret;
 	}
+	core->released_from_reset = true;
+	wake_up_interruptible(&cluster->core_transition);
 
 	/*
 	 * Newer IP revisions like on J7200 SoCs support h/w auto-initialization
@@ -1140,6 +1146,7 @@  static int k3_r5_rproc_configure_mode(struct k3_r5_rproc *kproc)
 		return ret;
 	}
 
+	core->released_from_reset = c_state;
 	ret = ti_sci_proc_get_status(core->tsp, &boot_vec, &cfg, &ctrl,
 				     &stat);
 	if (ret < 0) {
@@ -1280,6 +1287,21 @@  static int k3_r5_cluster_rproc_init(struct platform_device *pdev)
 		    cluster->mode == CLUSTER_MODE_SINGLECPU ||
 		    cluster->mode == CLUSTER_MODE_SINGLECORE)
 			break;
+
+		/* R5 cores require to be powered on sequentially, core0
+		 * should be in higher power state than core1 in a cluster
+		 * So, wait for current core to power up before proceeding
+		 * to next core and put timeout of 2sec for each core.
+		 */
+		ret = wait_event_interruptible_timeout(cluster->core_transition,
+						       core->released_from_reset,
+						       msecs_to_jiffies(2000));
+		if (ret <= 0) {
+			dev_err(dev,
+				"Timed out waiting for %s core to power up!\n",
+				rproc->name);
+			return ret;
+		}
 	}
 
 	return 0;
@@ -1709,6 +1731,7 @@  static int k3_r5_probe(struct platform_device *pdev)
 	cluster->dev = dev;
 	cluster->soc_data = data;
 	INIT_LIST_HEAD(&cluster->cores);
+	init_waitqueue_head(&cluster->core_transition);
 
 	ret = of_property_read_u32(np, "ti,cluster-mode", &cluster->mode);
 	if (ret < 0 && ret != -EINVAL) {