diff mbox series

[v2,1/4] remoteproc: sysmon: Ensure remote notification ordering

Message ID 20201105045051.1365780-2-bjorn.andersson@linaro.org (mailing list archive)
State Superseded
Headers show
Series remoteproc: Improvement for the Qualcomm sysmon | expand

Commit Message

Bjorn Andersson Nov. 5, 2020, 4:50 a.m. UTC
The reliance on the remoteproc's state for determining when to send
sysmon notifications to a remote processor is racy with regard to
concurrent remoteproc operations.

Further more the advertisement of the state of other remote processor to
a newly started remote processor might not only send the wrong state,
but might result in a stream of state changes that are out of order.

Address this by introducing state tracking within the sysmon instances
themselves and extend the locking to ensure that the notifications are
consistent with this state.

Fixes: 1f36ab3f6e3b ("remoteproc: sysmon: Inform current rproc about all active rprocs")
Fixes: 1877f54f75ad ("remoteproc: sysmon: Add notifications for events")
Fixes: 1fb82ee806d1 ("remoteproc: qcom: Introduce sysmon")
Cc: stable@vger.kernel.org
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---

Changes since v1:
- Reduced the locking to be per sysmon instance
- Dropped unused local "rproc" variable in sysmon_notify()

 drivers/remoteproc/qcom_sysmon.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

Comments

Rishabh Bhatnagar Nov. 11, 2020, 12:57 a.m. UTC | #1
On 2020-11-04 20:50, Bjorn Andersson wrote:
> The reliance on the remoteproc's state for determining when to send
> sysmon notifications to a remote processor is racy with regard to
> concurrent remoteproc operations.
> 
> Further more the advertisement of the state of other remote processor 
> to
> a newly started remote processor might not only send the wrong state,
> but might result in a stream of state changes that are out of order.
> 
> Address this by introducing state tracking within the sysmon instances
> themselves and extend the locking to ensure that the notifications are
> consistent with this state.
> 
> Fixes: 1f36ab3f6e3b ("remoteproc: sysmon: Inform current rproc about
> all active rprocs")
> Fixes: 1877f54f75ad ("remoteproc: sysmon: Add notifications for 
> events")
> Fixes: 1fb82ee806d1 ("remoteproc: qcom: Introduce sysmon")
> Cc: stable@vger.kernel.org
> Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
> ---
> 
> Changes since v1:
> - Reduced the locking to be per sysmon instance
> - Dropped unused local "rproc" variable in sysmon_notify()
> 
>  drivers/remoteproc/qcom_sysmon.c | 27 +++++++++++++++++++++------
>  1 file changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/remoteproc/qcom_sysmon.c 
> b/drivers/remoteproc/qcom_sysmon.c
> index 9eb2f6bccea6..38f63c968fa8 100644
> --- a/drivers/remoteproc/qcom_sysmon.c
> +++ b/drivers/remoteproc/qcom_sysmon.c
> @@ -22,6 +22,9 @@ struct qcom_sysmon {
>  	struct rproc_subdev subdev;
>  	struct rproc *rproc;
> 
> +	int state;
> +	struct mutex state_lock;
> +
>  	struct list_head node;
> 
>  	const char *name;
> @@ -448,7 +451,10 @@ static int sysmon_prepare(struct rproc_subdev 
> *subdev)
>  		.ssr_event = SSCTL_SSR_EVENT_BEFORE_POWERUP
>  	};
> 
> +	mutex_lock(&sysmon->state_lock);
> +	sysmon->state = SSCTL_SSR_EVENT_BEFORE_POWERUP;
>  	blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
> +	mutex_unlock(&sysmon->state_lock);
> 
>  	return 0;
>  }
> @@ -472,22 +478,25 @@ static int sysmon_start(struct rproc_subdev 
> *subdev)
>  		.ssr_event = SSCTL_SSR_EVENT_AFTER_POWERUP
>  	};
> 
> +	mutex_lock(&sysmon->state_lock);
> +	sysmon->state = SSCTL_SSR_EVENT_AFTER_POWERUP;
>  	blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
> +	mutex_unlock(&sysmon->state_lock);
> 
> -	mutex_lock(&sysmon_lock);

We should keep the sysmon_lock to make sure sysmon_list is not modified
at the time we are doing this operation?
>  	list_for_each_entry(target, &sysmon_list, node) {
> -		if (target == sysmon ||
> -		    target->rproc->state != RPROC_RUNNING)
> +		if (target == sysmon)
>  			continue;
> 
> +		mutex_lock(&target->state_lock);
>  		event.subsys_name = target->name;
> +		event.ssr_event = target->state;

Is it better to only send this event when target->state is 
"SSCTL_SSR_EVENT_AFTER_POWERUP"?
> 
>  		if (sysmon->ssctl_version == 2)
>  			ssctl_send_event(sysmon, &event);
>  		else if (sysmon->ept)
>  			sysmon_send_event(sysmon, &event);
> +		mutex_unlock(&target->state_lock);
>  	}
> -	mutex_unlock(&sysmon_lock);
> 
>  	return 0;
>  }
> @@ -500,7 +509,10 @@ static void sysmon_stop(struct rproc_subdev
> *subdev, bool crashed)
>  		.ssr_event = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN
>  	};
> 
> +	mutex_lock(&sysmon->state_lock);
> +	sysmon->state = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN;
>  	blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
> +	mutex_unlock(&sysmon->state_lock);
> 
>  	/* Don't request graceful shutdown if we've crashed */
>  	if (crashed)
> @@ -521,7 +533,10 @@ static void sysmon_unprepare(struct rproc_subdev 
> *subdev)
>  		.ssr_event = SSCTL_SSR_EVENT_AFTER_SHUTDOWN
>  	};
> 
> +	mutex_lock(&sysmon->state_lock);
> +	sysmon->state = SSCTL_SSR_EVENT_AFTER_SHUTDOWN;
>  	blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
> +	mutex_unlock(&sysmon->state_lock);
>  }
> 
>  /**
> @@ -534,11 +549,10 @@ static int sysmon_notify(struct notifier_block
> *nb, unsigned long event,
>  			 void *data)
>  {
>  	struct qcom_sysmon *sysmon = container_of(nb, struct qcom_sysmon, 
> nb);
> -	struct rproc *rproc = sysmon->rproc;
>  	struct sysmon_event *sysmon_event = data;
> 
>  	/* Skip non-running rprocs and the originating instance */
> -	if (rproc->state != RPROC_RUNNING ||
> +	if (sysmon->state != SSCTL_SSR_EVENT_AFTER_POWERUP ||
>  	    !strcmp(sysmon_event->subsys_name, sysmon->name)) {
>  		dev_dbg(sysmon->dev, "not notifying %s\n", sysmon->name);
>  		return NOTIFY_DONE;
> @@ -591,6 +605,7 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct
> rproc *rproc,
>  	init_completion(&sysmon->ind_comp);
>  	init_completion(&sysmon->shutdown_comp);
>  	mutex_init(&sysmon->lock);
> +	mutex_init(&sysmon->state_lock);
> 
>  	sysmon->shutdown_irq = of_irq_get_byname(sysmon->dev->of_node,
>  						 "shutdown-ack");
Bjorn Andersson Nov. 11, 2020, 5:40 a.m. UTC | #2
On Tue 10 Nov 18:57 CST 2020, rishabhb@codeaurora.org wrote:

> On 2020-11-04 20:50, Bjorn Andersson wrote:
> > The reliance on the remoteproc's state for determining when to send
> > sysmon notifications to a remote processor is racy with regard to
> > concurrent remoteproc operations.
> > 
> > Further more the advertisement of the state of other remote processor to
> > a newly started remote processor might not only send the wrong state,
> > but might result in a stream of state changes that are out of order.
> > 
> > Address this by introducing state tracking within the sysmon instances
> > themselves and extend the locking to ensure that the notifications are
> > consistent with this state.
> > 
> > Fixes: 1f36ab3f6e3b ("remoteproc: sysmon: Inform current rproc about
> > all active rprocs")
> > Fixes: 1877f54f75ad ("remoteproc: sysmon: Add notifications for events")
> > Fixes: 1fb82ee806d1 ("remoteproc: qcom: Introduce sysmon")
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
> > ---
> > 
> > Changes since v1:
> > - Reduced the locking to be per sysmon instance
> > - Dropped unused local "rproc" variable in sysmon_notify()
> > 
> >  drivers/remoteproc/qcom_sysmon.c | 27 +++++++++++++++++++++------
> >  1 file changed, 21 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/remoteproc/qcom_sysmon.c
> > b/drivers/remoteproc/qcom_sysmon.c
> > index 9eb2f6bccea6..38f63c968fa8 100644
> > --- a/drivers/remoteproc/qcom_sysmon.c
> > +++ b/drivers/remoteproc/qcom_sysmon.c
> > @@ -22,6 +22,9 @@ struct qcom_sysmon {
> >  	struct rproc_subdev subdev;
> >  	struct rproc *rproc;
> > 
> > +	int state;
> > +	struct mutex state_lock;
> > +
> >  	struct list_head node;
> > 
> >  	const char *name;
> > @@ -448,7 +451,10 @@ static int sysmon_prepare(struct rproc_subdev
> > *subdev)
> >  		.ssr_event = SSCTL_SSR_EVENT_BEFORE_POWERUP
> >  	};
> > 
> > +	mutex_lock(&sysmon->state_lock);
> > +	sysmon->state = SSCTL_SSR_EVENT_BEFORE_POWERUP;
> >  	blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
> > +	mutex_unlock(&sysmon->state_lock);
> > 
> >  	return 0;
> >  }
> > @@ -472,22 +478,25 @@ static int sysmon_start(struct rproc_subdev
> > *subdev)
> >  		.ssr_event = SSCTL_SSR_EVENT_AFTER_POWERUP
> >  	};
> > 
> > +	mutex_lock(&sysmon->state_lock);
> > +	sysmon->state = SSCTL_SSR_EVENT_AFTER_POWERUP;
> >  	blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
> > +	mutex_unlock(&sysmon->state_lock);
> > 
> > -	mutex_lock(&sysmon_lock);
> 
> We should keep the sysmon_lock to make sure sysmon_list is not modified
> at the time we are doing this operation?

Yes, that seems like a very good idea. I will review and update.

> >  	list_for_each_entry(target, &sysmon_list, node) {
> > -		if (target == sysmon ||
> > -		    target->rproc->state != RPROC_RUNNING)
> > +		if (target == sysmon)
> >  			continue;
> > 
> > +		mutex_lock(&target->state_lock);
> >  		event.subsys_name = target->name;
> > +		event.ssr_event = target->state;
> 
> Is it better to only send this event when target->state is
> "SSCTL_SSR_EVENT_AFTER_POWERUP"?

It depends on what the remote's requirements, I tested this and didn't
see any problems sending both SSCTL_SSR_EVENT_AFTER_POWERUP and
SSCTL_SSR_EVENT_AFTER_SHUTDOWN at least...
I don't know if I managed to hit a case where I sent any of the
intermediate states.

If you could provide some more input here I would appreciate it -
although I would be happy to merge the patch after fixing above locking
issue and then we can limit the events sent once we have a more detailed
answer, if that helps.

Regards,
Bjorn
diff mbox series

Patch

diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c
index 9eb2f6bccea6..38f63c968fa8 100644
--- a/drivers/remoteproc/qcom_sysmon.c
+++ b/drivers/remoteproc/qcom_sysmon.c
@@ -22,6 +22,9 @@  struct qcom_sysmon {
 	struct rproc_subdev subdev;
 	struct rproc *rproc;
 
+	int state;
+	struct mutex state_lock;
+
 	struct list_head node;
 
 	const char *name;
@@ -448,7 +451,10 @@  static int sysmon_prepare(struct rproc_subdev *subdev)
 		.ssr_event = SSCTL_SSR_EVENT_BEFORE_POWERUP
 	};
 
+	mutex_lock(&sysmon->state_lock);
+	sysmon->state = SSCTL_SSR_EVENT_BEFORE_POWERUP;
 	blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
+	mutex_unlock(&sysmon->state_lock);
 
 	return 0;
 }
@@ -472,22 +478,25 @@  static int sysmon_start(struct rproc_subdev *subdev)
 		.ssr_event = SSCTL_SSR_EVENT_AFTER_POWERUP
 	};
 
+	mutex_lock(&sysmon->state_lock);
+	sysmon->state = SSCTL_SSR_EVENT_AFTER_POWERUP;
 	blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
+	mutex_unlock(&sysmon->state_lock);
 
-	mutex_lock(&sysmon_lock);
 	list_for_each_entry(target, &sysmon_list, node) {
-		if (target == sysmon ||
-		    target->rproc->state != RPROC_RUNNING)
+		if (target == sysmon)
 			continue;
 
+		mutex_lock(&target->state_lock);
 		event.subsys_name = target->name;
+		event.ssr_event = target->state;
 
 		if (sysmon->ssctl_version == 2)
 			ssctl_send_event(sysmon, &event);
 		else if (sysmon->ept)
 			sysmon_send_event(sysmon, &event);
+		mutex_unlock(&target->state_lock);
 	}
-	mutex_unlock(&sysmon_lock);
 
 	return 0;
 }
@@ -500,7 +509,10 @@  static void sysmon_stop(struct rproc_subdev *subdev, bool crashed)
 		.ssr_event = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN
 	};
 
+	mutex_lock(&sysmon->state_lock);
+	sysmon->state = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN;
 	blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
+	mutex_unlock(&sysmon->state_lock);
 
 	/* Don't request graceful shutdown if we've crashed */
 	if (crashed)
@@ -521,7 +533,10 @@  static void sysmon_unprepare(struct rproc_subdev *subdev)
 		.ssr_event = SSCTL_SSR_EVENT_AFTER_SHUTDOWN
 	};
 
+	mutex_lock(&sysmon->state_lock);
+	sysmon->state = SSCTL_SSR_EVENT_AFTER_SHUTDOWN;
 	blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
+	mutex_unlock(&sysmon->state_lock);
 }
 
 /**
@@ -534,11 +549,10 @@  static int sysmon_notify(struct notifier_block *nb, unsigned long event,
 			 void *data)
 {
 	struct qcom_sysmon *sysmon = container_of(nb, struct qcom_sysmon, nb);
-	struct rproc *rproc = sysmon->rproc;
 	struct sysmon_event *sysmon_event = data;
 
 	/* Skip non-running rprocs and the originating instance */
-	if (rproc->state != RPROC_RUNNING ||
+	if (sysmon->state != SSCTL_SSR_EVENT_AFTER_POWERUP ||
 	    !strcmp(sysmon_event->subsys_name, sysmon->name)) {
 		dev_dbg(sysmon->dev, "not notifying %s\n", sysmon->name);
 		return NOTIFY_DONE;
@@ -591,6 +605,7 @@  struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
 	init_completion(&sysmon->ind_comp);
 	init_completion(&sysmon->shutdown_comp);
 	mutex_init(&sysmon->lock);
+	mutex_init(&sysmon->state_lock);
 
 	sysmon->shutdown_irq = of_irq_get_byname(sysmon->dev->of_node,
 						 "shutdown-ack");