diff mbox series

remoteproc: qcom: Add NOTIFY_FATAL event type to SSR subdevice

Message ID 20230503062146.3891-1-quic_viswanat@quicinc.com (mailing list archive)
State Changes Requested
Headers show
Series remoteproc: qcom: Add NOTIFY_FATAL event type to SSR subdevice | expand

Commit Message

Vignesh Viswanathan May 3, 2023, 6:21 a.m. UTC
Currently the SSR subdevice notifies the client driver on crash of the
rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
However the client driver might be interested to know that the device
has crashed immediately to pause any further transactions with the
rproc. This calls for an event to be sent to the driver in the IRQ
context as soon as the rproc crashes.

Add NOTIFY_FATAL event to SSR subdevice to atomically notify rproc has
crashed to the client driver.

Validated the event in IPQ9574 and IPQ5332 by forcing the rproc to crash
and ensuring the registered notifier function receives the notification
in IRQ context.

Signed-off-by: Vignesh Viswanathan <quic_viswanat@quicinc.com>
---
 drivers/remoteproc/qcom_common.c      | 60 +++++++++++++++++++++++++++
 drivers/remoteproc/remoteproc_core.c  | 12 ++++++
 include/linux/remoteproc.h            |  3 ++
 include/linux/remoteproc/qcom_rproc.h | 17 ++++++++
 4 files changed, 92 insertions(+)

Comments

Mukesh Ojha May 3, 2023, 11:26 a.m. UTC | #1
On 5/3/2023 11:51 AM, Vignesh Viswanathan wrote:
> Currently the SSR subdevice notifies the client driver on crash of the
> rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
> However the client driver might be interested to know that the device
> has crashed immediately to pause any further transactions with the
> rproc. This calls for an event to be sent to the driver in the IRQ
> context as soon as the rproc crashes.
> 
> Add NOTIFY_FATAL event to SSR subdevice to atomically notify rproc has
> crashed to the client driver.
> 
> Validated the event in IPQ9574 and IPQ5332 by forcing the rproc to crash
> and ensuring the registered notifier function receives the notification
> in IRQ context.

This was one of valid use case we encounter in android, We have some 
other way of doing the same thing without core kernel change with
something called early notifiers.

https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/7583d24de337aa1bf7c375a7da706af9b995b9a1#a840754ebb0e24e88adbf48177e1abd0830b72d2

https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/257de41c63a5a51a081cc7887cdaa4a46e4d1744

But good to address this if possible.

-Mukesh
> 
> Signed-off-by: Vignesh Viswanathan <quic_viswanat@quicinc.com>
> ---
>   drivers/remoteproc/qcom_common.c      | 60 +++++++++++++++++++++++++++
>   drivers/remoteproc/remoteproc_core.c  | 12 ++++++
>   include/linux/remoteproc.h            |  3 ++
>   include/linux/remoteproc/qcom_rproc.h | 17 ++++++++
>   4 files changed, 92 insertions(+)
> 
> diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
> index a0d4238492e9..76542229aeb6 100644
> --- a/drivers/remoteproc/qcom_common.c
> +++ b/drivers/remoteproc/qcom_common.c
> @@ -84,6 +84,7 @@ struct minidump_global_toc {
>   struct qcom_ssr_subsystem {
>   	const char *name;
>   	struct srcu_notifier_head notifier_list;
> +	struct atomic_notifier_head atomic_notifier_list;
>   	struct list_head list;
>   };
>   
> @@ -366,6 +367,7 @@ static struct qcom_ssr_subsystem *qcom_ssr_get_subsys(const char *name)
>   	}
>   	info->name = kstrdup_const(name, GFP_KERNEL);
>   	srcu_init_notifier_head(&info->notifier_list);
> +	ATOMIC_INIT_NOTIFIER_HEAD(&info->atomic_notifier_list);
>   
>   	/* Add to global notification list */
>   	list_add_tail(&info->list, &qcom_ssr_subsystem_list);
> @@ -417,6 +419,51 @@ int qcom_unregister_ssr_notifier(void *notify, struct notifier_block *nb)
>   }
>   EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
>   
> +/**
> + * qcom_register_ssr_atomic_notifier() - register SSR Atomic notification
> + *					 handler
> + * @name:	Subsystem's SSR name
> + * @nb:	notifier_block to be invoked upon subsystem's state change
> + *
> + * This registers the @nb notifier block as part the atomic notifier
> + * chain for a remoteproc associated with @name. The notifier block's callback
> + * will be invoked when the remote processor crashes in atomic context before
> + * the recovery process is queued.
> + *
> + * Return: a subsystem cookie on success, ERR_PTR on failure.
> + */
> +void *qcom_register_ssr_atomic_notifier(const char *name,
> +					struct notifier_block *nb)
> +{
> +	struct qcom_ssr_subsystem *info;
> +
> +	info = qcom_ssr_get_subsys(name);
> +	if (IS_ERR(info))
> +		return info;
> +
> +	atomic_notifier_chain_register(&info->atomic_notifier_list, nb);
> +
> +	return &info->atomic_notifier_list;
> +}
> +EXPORT_SYMBOL_GPL(qcom_register_ssr_atomic_notifier);
> +
> +/**
> + * qcom_unregister_ssr_atomic_notifier() - unregister SSR Atomic notification
> + *					   handler
> + * @notify:	subsystem cookie returned from qcom_register_ssr_notifier
> + * @nb:		notifier_block to unregister
> + *
> + * This function will unregister the notifier from the atomic notifier
> + * chain.
> + *
> + * Return: 0 on success, %ENOENT otherwise.
> + */
> +int qcom_unregister_ssr_atomic_notifier(void *notify, struct notifier_block *nb)
> +{
> +	return atomic_notifier_chain_unregister(notify, nb);
> +}
> +EXPORT_SYMBOL_GPL(qcom_unregister_ssr_atomic_notifier);
> +
>   static int ssr_notify_prepare(struct rproc_subdev *subdev)
>   {
>   	struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
> @@ -467,6 +514,18 @@ static void ssr_notify_unprepare(struct rproc_subdev *subdev)
>   				 QCOM_SSR_AFTER_SHUTDOWN, &data);
>   }
>   
> +static void ssr_notify_crash(struct rproc_subdev *subdev)
> +{
> +	struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
> +	struct qcom_ssr_notify_data data = {
> +		.name = ssr->info->name,
> +		.crashed = true,
> +	};
> +
> +	atomic_notifier_call_chain(&ssr->info->atomic_notifier_list,
> +				   QCOM_SSR_NOTIFY_CRASH, &data);
> +}
> +
>   /**
>    * qcom_add_ssr_subdev() - register subdevice as restart notification source
>    * @rproc:	rproc handle
> @@ -493,6 +552,7 @@ void qcom_add_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr,
>   	ssr->subdev.start = ssr_notify_start;
>   	ssr->subdev.stop = ssr_notify_stop;
>   	ssr->subdev.unprepare = ssr_notify_unprepare;
> +	ssr->subdev.notify_crash = ssr_notify_crash;
>   
>   	rproc_add_subdev(rproc, &ssr->subdev);
>   }
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index 695cce218e8c..3de0ece158ea 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -1139,6 +1139,16 @@ static void rproc_unprepare_subdevices(struct rproc *rproc)
>   	}
>   }
>   
> +static void rproc_notify_crash_subdevices(struct rproc *rproc)
> +{
> +	struct rproc_subdev *subdev;
> +
> +	list_for_each_entry_reverse(subdev, &rproc->subdevs, node) {
> +		if (subdev->notify_crash)
> +			subdev->notify_crash(subdev);
> +	}
> +}
> +
>   /**
>    * rproc_alloc_registered_carveouts() - allocate all carveouts registered
>    * in the list
> @@ -2687,6 +2697,8 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
>   	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>   		rproc->name, rproc_crash_to_string(type));
>   
> +	rproc_notify_crash_subdevices(rproc);
> +
>   	queue_work(rproc_recovery_wq, &rproc->crash_handler);
>   }
>   EXPORT_SYMBOL(rproc_report_crash);
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index fe8978eb69f1..f3c0e0103e81 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -596,6 +596,8 @@ struct rproc {
>    * @stop: stop function, called before the rproc is stopped; the @crashed
>    *	    parameter indicates if this originates from a recovery
>    * @unprepare: unprepare function, called after the rproc has been stopped
> + * @notify_crash: notify_crash function, called in atomic context to notify
> + *		  rproc has crashed and recovery is about to start
>    */
>   struct rproc_subdev {
>   	struct list_head node;
> @@ -604,6 +606,7 @@ struct rproc_subdev {
>   	int (*start)(struct rproc_subdev *subdev);
>   	void (*stop)(struct rproc_subdev *subdev, bool crashed);
>   	void (*unprepare)(struct rproc_subdev *subdev);
> +	void (*notify_crash)(struct rproc_subdev *subdev);
>   };
>   
>   /* we currently support only two vrings per rvdev */
> diff --git a/include/linux/remoteproc/qcom_rproc.h b/include/linux/remoteproc/qcom_rproc.h
> index 82b211518136..f3d06900f297 100644
> --- a/include/linux/remoteproc/qcom_rproc.h
> +++ b/include/linux/remoteproc/qcom_rproc.h
> @@ -11,12 +11,14 @@ struct notifier_block;
>    * @QCOM_SSR_AFTER_POWERUP:	Remoteproc is running (start stage)
>    * @QCOM_SSR_BEFORE_SHUTDOWN:	Remoteproc crashed or shutting down (stop stage)
>    * @QCOM_SSR_AFTER_SHUTDOWN:	Remoteproc is down (unprepare stage)
> + * @QCOM_SSR_NOTIFY_CRASH:	Remoteproc crashed
>    */
>   enum qcom_ssr_notify_type {
>   	QCOM_SSR_BEFORE_POWERUP,
>   	QCOM_SSR_AFTER_POWERUP,
>   	QCOM_SSR_BEFORE_SHUTDOWN,
>   	QCOM_SSR_AFTER_SHUTDOWN,
> +	QCOM_SSR_NOTIFY_CRASH,
>   };
>   
>   struct qcom_ssr_notify_data {
> @@ -29,6 +31,10 @@ struct qcom_ssr_notify_data {
>   void *qcom_register_ssr_notifier(const char *name, struct notifier_block *nb);
>   int qcom_unregister_ssr_notifier(void *notify, struct notifier_block *nb);
>   
> +void *qcom_register_ssr_atomic_notifier(const char *name,
> +					struct notifier_block *nb);
> +int qcom_unregister_ssr_atomic_notifier(void *notify,
> +					struct notifier_block *nb);
>   #else
>   
>   static inline void *qcom_register_ssr_notifier(const char *name,
> @@ -43,6 +49,17 @@ static inline int qcom_unregister_ssr_notifier(void *notify,
>   	return 0;
>   }
>   
> +static inline void *qcom_register_ssr_atomic_notifier(const char *name,
> +						      struct notifier_block *nb)
> +{
> +	return 0;
> +}
> +
> +static inline int qcom_unregister_ssr_atomic_notifier(void *notify,
> +						      struct notifier_block *nb)
> +{
> +	return 0;
> +}
>   #endif
>   
>   #endif
Mukesh Ojha May 22, 2023, 9:33 a.m. UTC | #2
On 5/3/2023 4:56 PM, Mukesh Ojha wrote:
> 
> 
> On 5/3/2023 11:51 AM, Vignesh Viswanathan wrote:
>> Currently the SSR subdevice notifies the client driver on crash of the
>> rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
>> However the client driver might be interested to know that the device
>> has crashed immediately to pause any further transactions with the
>> rproc. This calls for an event to be sent to the driver in the IRQ
>> context as soon as the rproc crashes.
>>
>> Add NOTIFY_FATAL event to SSR subdevice to atomically notify rproc has
>> crashed to the client driver.
>>
>> Validated the event in IPQ9574 and IPQ5332 by forcing the rproc to crash
>> and ensuring the registered notifier function receives the notification
>> in IRQ context.
> 
> This was one of valid use case we encounter in android, We have some 
> other way of doing the same thing without core kernel change with
> something called early notifiers.
> 
> https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/7583d24de337aa1bf7c375a7da706af9b995b9a1#a840754ebb0e24e88adbf48177e1abd0830b72d2
> 
> https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/257de41c63a5a51a081cc7887cdaa4a46e4d1744
> 
> But good to address this if possible.

Ack the idea of early notifier;
But here, atomic does not guarantees it to be atomic.

Acked-by: Mukesh Ojha <quic_mojha@quicinc.com>

-- Mukesh


> 
> -Mukesh
>>
>> Signed-off-by: Vignesh Viswanathan <quic_viswanat@quicinc.com>
>> ---
>>   drivers/remoteproc/qcom_common.c      | 60 +++++++++++++++++++++++++++
>>   drivers/remoteproc/remoteproc_core.c  | 12 ++++++
>>   include/linux/remoteproc.h            |  3 ++
>>   include/linux/remoteproc/qcom_rproc.h | 17 ++++++++
>>   4 files changed, 92 insertions(+)
>>
>> diff --git a/drivers/remoteproc/qcom_common.c 
>> b/drivers/remoteproc/qcom_common.c
>> index a0d4238492e9..76542229aeb6 100644
>> --- a/drivers/remoteproc/qcom_common.c
>> +++ b/drivers/remoteproc/qcom_common.c
>> @@ -84,6 +84,7 @@ struct minidump_global_toc {
>>   struct qcom_ssr_subsystem {
>>       const char *name;
>>       struct srcu_notifier_head notifier_list;
>> +    struct atomic_notifier_head atomic_notifier_list;
>>       struct list_head list;
>>   };
>> @@ -366,6 +367,7 @@ static struct qcom_ssr_subsystem 
>> *qcom_ssr_get_subsys(const char *name)
>>       }
>>       info->name = kstrdup_const(name, GFP_KERNEL);
>>       srcu_init_notifier_head(&info->notifier_list);
>> +    ATOMIC_INIT_NOTIFIER_HEAD(&info->atomic_notifier_list);
>>       /* Add to global notification list */
>>       list_add_tail(&info->list, &qcom_ssr_subsystem_list);
>> @@ -417,6 +419,51 @@ int qcom_unregister_ssr_notifier(void *notify, 
>> struct notifier_block *nb)
>>   }
>>   EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
>> +/**
>> + * qcom_register_ssr_atomic_notifier() - register SSR Atomic 
>> notification
>> + *                     handler
>> + * @name:    Subsystem's SSR name
>> + * @nb:    notifier_block to be invoked upon subsystem's state change
>> + *
>> + * This registers the @nb notifier block as part the atomic notifier
>> + * chain for a remoteproc associated with @name. The notifier block's 
>> callback
>> + * will be invoked when the remote processor crashes in atomic 
>> context before
>> + * the recovery process is queued.
>> + *
>> + * Return: a subsystem cookie on success, ERR_PTR on failure.
>> + */
>> +void *qcom_register_ssr_atomic_notifier(const char *name,
>> +                    struct notifier_block *nb)
>> +{
>> +    struct qcom_ssr_subsystem *info;
>> +
>> +    info = qcom_ssr_get_subsys(name);
>> +    if (IS_ERR(info))
>> +        return info;
>> +
>> +    atomic_notifier_chain_register(&info->atomic_notifier_list, nb);
>> +
>> +    return &info->atomic_notifier_list;
>> +}
>> +EXPORT_SYMBOL_GPL(qcom_register_ssr_atomic_notifier);
>> +
>> +/**
>> + * qcom_unregister_ssr_atomic_notifier() - unregister SSR Atomic 
>> notification
>> + *                       handler
>> + * @notify:    subsystem cookie returned from qcom_register_ssr_notifier
>> + * @nb:        notifier_block to unregister
>> + *
>> + * This function will unregister the notifier from the atomic notifier
>> + * chain.
>> + *
>> + * Return: 0 on success, %ENOENT otherwise.
>> + */
>> +int qcom_unregister_ssr_atomic_notifier(void *notify, struct 
>> notifier_block *nb)
>> +{
>> +    return atomic_notifier_chain_unregister(notify, nb);
>> +}
>> +EXPORT_SYMBOL_GPL(qcom_unregister_ssr_atomic_notifier);
>> +
>>   static int ssr_notify_prepare(struct rproc_subdev *subdev)
>>   {
>>       struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
>> @@ -467,6 +514,18 @@ static void ssr_notify_unprepare(struct 
>> rproc_subdev *subdev)
>>                    QCOM_SSR_AFTER_SHUTDOWN, &data);
>>   }
>> +static void ssr_notify_crash(struct rproc_subdev *subdev)
>> +{
>> +    struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
>> +    struct qcom_ssr_notify_data data = {
>> +        .name = ssr->info->name,
>> +        .crashed = true,
>> +    };
>> +
>> +    atomic_notifier_call_chain(&ssr->info->atomic_notifier_list,
>> +                   QCOM_SSR_NOTIFY_CRASH, &data);
>> +}
>> +
>>   /**
>>    * qcom_add_ssr_subdev() - register subdevice as restart 
>> notification source
>>    * @rproc:    rproc handle
>> @@ -493,6 +552,7 @@ void qcom_add_ssr_subdev(struct rproc *rproc, 
>> struct qcom_rproc_ssr *ssr,
>>       ssr->subdev.start = ssr_notify_start;
>>       ssr->subdev.stop = ssr_notify_stop;
>>       ssr->subdev.unprepare = ssr_notify_unprepare;
>> +    ssr->subdev.notify_crash = ssr_notify_crash;
>>       rproc_add_subdev(rproc, &ssr->subdev);
>>   }
>> diff --git a/drivers/remoteproc/remoteproc_core.c 
>> b/drivers/remoteproc/remoteproc_core.c
>> index 695cce218e8c..3de0ece158ea 100644
>> --- a/drivers/remoteproc/remoteproc_core.c
>> +++ b/drivers/remoteproc/remoteproc_core.c
>> @@ -1139,6 +1139,16 @@ static void rproc_unprepare_subdevices(struct 
>> rproc *rproc)
>>       }
>>   }
>> +static void rproc_notify_crash_subdevices(struct rproc *rproc)
>> +{
>> +    struct rproc_subdev *subdev;
>> +
>> +    list_for_each_entry_reverse(subdev, &rproc->subdevs, node) {
>> +        if (subdev->notify_crash)
>> +            subdev->notify_crash(subdev);
>> +    }
>> +}
>> +
>>   /**
>>    * rproc_alloc_registered_carveouts() - allocate all carveouts 
>> registered
>>    * in the list
>> @@ -2687,6 +2697,8 @@ void rproc_report_crash(struct rproc *rproc, 
>> enum rproc_crash_type type)
>>       dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>>           rproc->name, rproc_crash_to_string(type));
>> +    rproc_notify_crash_subdevices(rproc);
>> +
>>       queue_work(rproc_recovery_wq, &rproc->crash_handler);
>>   }
>>   EXPORT_SYMBOL(rproc_report_crash);
>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>> index fe8978eb69f1..f3c0e0103e81 100644
>> --- a/include/linux/remoteproc.h
>> +++ b/include/linux/remoteproc.h
>> @@ -596,6 +596,8 @@ struct rproc {
>>    * @stop: stop function, called before the rproc is stopped; the 
>> @crashed
>>    *        parameter indicates if this originates from a recovery
>>    * @unprepare: unprepare function, called after the rproc has been 
>> stopped
>> + * @notify_crash: notify_crash function, called in atomic context to 
>> notify
>> + *          rproc has crashed and recovery is about to start
>>    */
>>   struct rproc_subdev {
>>       struct list_head node;
>> @@ -604,6 +606,7 @@ struct rproc_subdev {
>>       int (*start)(struct rproc_subdev *subdev);
>>       void (*stop)(struct rproc_subdev *subdev, bool crashed);
>>       void (*unprepare)(struct rproc_subdev *subdev);
>> +    void (*notify_crash)(struct rproc_subdev *subdev);
>>   };
>>   /* we currently support only two vrings per rvdev */
>> diff --git a/include/linux/remoteproc/qcom_rproc.h 
>> b/include/linux/remoteproc/qcom_rproc.h
>> index 82b211518136..f3d06900f297 100644
>> --- a/include/linux/remoteproc/qcom_rproc.h
>> +++ b/include/linux/remoteproc/qcom_rproc.h
>> @@ -11,12 +11,14 @@ struct notifier_block;
>>    * @QCOM_SSR_AFTER_POWERUP:    Remoteproc is running (start stage)
>>    * @QCOM_SSR_BEFORE_SHUTDOWN:    Remoteproc crashed or shutting down 
>> (stop stage)
>>    * @QCOM_SSR_AFTER_SHUTDOWN:    Remoteproc is down (unprepare stage)
>> + * @QCOM_SSR_NOTIFY_CRASH:    Remoteproc crashed
>>    */
>>   enum qcom_ssr_notify_type {
>>       QCOM_SSR_BEFORE_POWERUP,
>>       QCOM_SSR_AFTER_POWERUP,
>>       QCOM_SSR_BEFORE_SHUTDOWN,
>>       QCOM_SSR_AFTER_SHUTDOWN,
>> +    QCOM_SSR_NOTIFY_CRASH,
>>   };
>>   struct qcom_ssr_notify_data {
>> @@ -29,6 +31,10 @@ struct qcom_ssr_notify_data {
>>   void *qcom_register_ssr_notifier(const char *name, struct 
>> notifier_block *nb);
>>   int qcom_unregister_ssr_notifier(void *notify, struct notifier_block 
>> *nb);
>> +void *qcom_register_ssr_atomic_notifier(const char *name,
>> +                    struct notifier_block *nb);
>> +int qcom_unregister_ssr_atomic_notifier(void *notify,
>> +                    struct notifier_block *nb);
>>   #else
>>   static inline void *qcom_register_ssr_notifier(const char *name,
>> @@ -43,6 +49,17 @@ static inline int qcom_unregister_ssr_notifier(void 
>> *notify,
>>       return 0;
>>   }
>> +static inline void *qcom_register_ssr_atomic_notifier(const char *name,
>> +                              struct notifier_block *nb)
>> +{
>> +    return 0;
>> +}
>> +
>> +static inline int qcom_unregister_ssr_atomic_notifier(void *notify,
>> +                              struct notifier_block *nb)
>> +{
>> +    return 0;
>> +}
>>   #endif
>>   #endif
>
Vignesh Viswanathan May 29, 2023, 3:57 a.m. UTC | #3
Gentle Reminder.

On 5/22/2023 3:03 PM, Mukesh Ojha wrote:
> 
> 
> On 5/3/2023 4:56 PM, Mukesh Ojha wrote:
>>
>>
>> On 5/3/2023 11:51 AM, Vignesh Viswanathan wrote:
>>> Currently the SSR subdevice notifies the client driver on crash of the
>>> rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
>>> However the client driver might be interested to know that the device
>>> has crashed immediately to pause any further transactions with the
>>> rproc. This calls for an event to be sent to the driver in the IRQ
>>> context as soon as the rproc crashes.
>>>
>>> Add NOTIFY_FATAL event to SSR subdevice to atomically notify rproc has
>>> crashed to the client driver.
>>>
>>> Validated the event in IPQ9574 and IPQ5332 by forcing the rproc to crash
>>> and ensuring the registered notifier function receives the notification
>>> in IRQ context.
>>
>> This was one of valid use case we encounter in android, We have some 
>> other way of doing the same thing without core kernel change with
>> something called early notifiers.
>>
>> https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/7583d24de337aa1bf7c375a7da706af9b995b9a1#a840754ebb0e24e88adbf48177e1abd0830b72d2
>>
>> https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/257de41c63a5a51a081cc7887cdaa4a46e4d1744
>>
>> But good to address this if possible.
> 
> Ack the idea of early notifier;
> But here, atomic does not guarantees it to be atomic.
> 
> Acked-by: Mukesh Ojha <quic_mojha@quicinc.com>
> 
> -- Mukesh
> 
> 
>>
>> -Mukesh
>>>
>>> Signed-off-by: Vignesh Viswanathan <quic_viswanat@quicinc.com>
>>> ---
>>>   drivers/remoteproc/qcom_common.c      | 60 +++++++++++++++++++++++++++
>>>   drivers/remoteproc/remoteproc_core.c  | 12 ++++++
>>>   include/linux/remoteproc.h            |  3 ++
>>>   include/linux/remoteproc/qcom_rproc.h | 17 ++++++++
>>>   4 files changed, 92 insertions(+)
>>>
>>> diff --git a/drivers/remoteproc/qcom_common.c 
>>> b/drivers/remoteproc/qcom_common.c
>>> index a0d4238492e9..76542229aeb6 100644
>>> --- a/drivers/remoteproc/qcom_common.c
>>> +++ b/drivers/remoteproc/qcom_common.c
>>> @@ -84,6 +84,7 @@ struct minidump_global_toc {
>>>   struct qcom_ssr_subsystem {
>>>       const char *name;
>>>       struct srcu_notifier_head notifier_list;
>>> +    struct atomic_notifier_head atomic_notifier_list;
>>>       struct list_head list;
>>>   };
>>> @@ -366,6 +367,7 @@ static struct qcom_ssr_subsystem 
>>> *qcom_ssr_get_subsys(const char *name)
>>>       }
>>>       info->name = kstrdup_const(name, GFP_KERNEL);
>>>       srcu_init_notifier_head(&info->notifier_list);
>>> +    ATOMIC_INIT_NOTIFIER_HEAD(&info->atomic_notifier_list);
>>>       /* Add to global notification list */
>>>       list_add_tail(&info->list, &qcom_ssr_subsystem_list);
>>> @@ -417,6 +419,51 @@ int qcom_unregister_ssr_notifier(void *notify, 
>>> struct notifier_block *nb)
>>>   }
>>>   EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
>>> +/**
>>> + * qcom_register_ssr_atomic_notifier() - register SSR Atomic 
>>> notification
>>> + *                     handler
>>> + * @name:    Subsystem's SSR name
>>> + * @nb:    notifier_block to be invoked upon subsystem's state change
>>> + *
>>> + * This registers the @nb notifier block as part the atomic notifier
>>> + * chain for a remoteproc associated with @name. The notifier 
>>> block's callback
>>> + * will be invoked when the remote processor crashes in atomic 
>>> context before
>>> + * the recovery process is queued.
>>> + *
>>> + * Return: a subsystem cookie on success, ERR_PTR on failure.
>>> + */
>>> +void *qcom_register_ssr_atomic_notifier(const char *name,
>>> +                    struct notifier_block *nb)
>>> +{
>>> +    struct qcom_ssr_subsystem *info;
>>> +
>>> +    info = qcom_ssr_get_subsys(name);
>>> +    if (IS_ERR(info))
>>> +        return info;
>>> +
>>> +    atomic_notifier_chain_register(&info->atomic_notifier_list, nb);
>>> +
>>> +    return &info->atomic_notifier_list;
>>> +}
>>> +EXPORT_SYMBOL_GPL(qcom_register_ssr_atomic_notifier);
>>> +
>>> +/**
>>> + * qcom_unregister_ssr_atomic_notifier() - unregister SSR Atomic 
>>> notification
>>> + *                       handler
>>> + * @notify:    subsystem cookie returned from 
>>> qcom_register_ssr_notifier
>>> + * @nb:        notifier_block to unregister
>>> + *
>>> + * This function will unregister the notifier from the atomic notifier
>>> + * chain.
>>> + *
>>> + * Return: 0 on success, %ENOENT otherwise.
>>> + */
>>> +int qcom_unregister_ssr_atomic_notifier(void *notify, struct 
>>> notifier_block *nb)
>>> +{
>>> +    return atomic_notifier_chain_unregister(notify, nb);
>>> +}
>>> +EXPORT_SYMBOL_GPL(qcom_unregister_ssr_atomic_notifier);
>>> +
>>>   static int ssr_notify_prepare(struct rproc_subdev *subdev)
>>>   {
>>>       struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
>>> @@ -467,6 +514,18 @@ static void ssr_notify_unprepare(struct 
>>> rproc_subdev *subdev)
>>>                    QCOM_SSR_AFTER_SHUTDOWN, &data);
>>>   }
>>> +static void ssr_notify_crash(struct rproc_subdev *subdev)
>>> +{
>>> +    struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
>>> +    struct qcom_ssr_notify_data data = {
>>> +        .name = ssr->info->name,
>>> +        .crashed = true,
>>> +    };
>>> +
>>> +    atomic_notifier_call_chain(&ssr->info->atomic_notifier_list,
>>> +                   QCOM_SSR_NOTIFY_CRASH, &data);
>>> +}
>>> +
>>>   /**
>>>    * qcom_add_ssr_subdev() - register subdevice as restart 
>>> notification source
>>>    * @rproc:    rproc handle
>>> @@ -493,6 +552,7 @@ void qcom_add_ssr_subdev(struct rproc *rproc, 
>>> struct qcom_rproc_ssr *ssr,
>>>       ssr->subdev.start = ssr_notify_start;
>>>       ssr->subdev.stop = ssr_notify_stop;
>>>       ssr->subdev.unprepare = ssr_notify_unprepare;
>>> +    ssr->subdev.notify_crash = ssr_notify_crash;
>>>       rproc_add_subdev(rproc, &ssr->subdev);
>>>   }
>>> diff --git a/drivers/remoteproc/remoteproc_core.c 
>>> b/drivers/remoteproc/remoteproc_core.c
>>> index 695cce218e8c..3de0ece158ea 100644
>>> --- a/drivers/remoteproc/remoteproc_core.c
>>> +++ b/drivers/remoteproc/remoteproc_core.c
>>> @@ -1139,6 +1139,16 @@ static void rproc_unprepare_subdevices(struct 
>>> rproc *rproc)
>>>       }
>>>   }
>>> +static void rproc_notify_crash_subdevices(struct rproc *rproc)
>>> +{
>>> +    struct rproc_subdev *subdev;
>>> +
>>> +    list_for_each_entry_reverse(subdev, &rproc->subdevs, node) {
>>> +        if (subdev->notify_crash)
>>> +            subdev->notify_crash(subdev);
>>> +    }
>>> +}
>>> +
>>>   /**
>>>    * rproc_alloc_registered_carveouts() - allocate all carveouts 
>>> registered
>>>    * in the list
>>> @@ -2687,6 +2697,8 @@ void rproc_report_crash(struct rproc *rproc, 
>>> enum rproc_crash_type type)
>>>       dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>>>           rproc->name, rproc_crash_to_string(type));
>>> +    rproc_notify_crash_subdevices(rproc);
>>> +
>>>       queue_work(rproc_recovery_wq, &rproc->crash_handler);
>>>   }
>>>   EXPORT_SYMBOL(rproc_report_crash);
>>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>>> index fe8978eb69f1..f3c0e0103e81 100644
>>> --- a/include/linux/remoteproc.h
>>> +++ b/include/linux/remoteproc.h
>>> @@ -596,6 +596,8 @@ struct rproc {
>>>    * @stop: stop function, called before the rproc is stopped; the 
>>> @crashed
>>>    *        parameter indicates if this originates from a recovery
>>>    * @unprepare: unprepare function, called after the rproc has been 
>>> stopped
>>> + * @notify_crash: notify_crash function, called in atomic context to 
>>> notify
>>> + *          rproc has crashed and recovery is about to start
>>>    */
>>>   struct rproc_subdev {
>>>       struct list_head node;
>>> @@ -604,6 +606,7 @@ struct rproc_subdev {
>>>       int (*start)(struct rproc_subdev *subdev);
>>>       void (*stop)(struct rproc_subdev *subdev, bool crashed);
>>>       void (*unprepare)(struct rproc_subdev *subdev);
>>> +    void (*notify_crash)(struct rproc_subdev *subdev);
>>>   };
>>>   /* we currently support only two vrings per rvdev */
>>> diff --git a/include/linux/remoteproc/qcom_rproc.h 
>>> b/include/linux/remoteproc/qcom_rproc.h
>>> index 82b211518136..f3d06900f297 100644
>>> --- a/include/linux/remoteproc/qcom_rproc.h
>>> +++ b/include/linux/remoteproc/qcom_rproc.h
>>> @@ -11,12 +11,14 @@ struct notifier_block;
>>>    * @QCOM_SSR_AFTER_POWERUP:    Remoteproc is running (start stage)
>>>    * @QCOM_SSR_BEFORE_SHUTDOWN:    Remoteproc crashed or shutting 
>>> down (stop stage)
>>>    * @QCOM_SSR_AFTER_SHUTDOWN:    Remoteproc is down (unprepare stage)
>>> + * @QCOM_SSR_NOTIFY_CRASH:    Remoteproc crashed
>>>    */
>>>   enum qcom_ssr_notify_type {
>>>       QCOM_SSR_BEFORE_POWERUP,
>>>       QCOM_SSR_AFTER_POWERUP,
>>>       QCOM_SSR_BEFORE_SHUTDOWN,
>>>       QCOM_SSR_AFTER_SHUTDOWN,
>>> +    QCOM_SSR_NOTIFY_CRASH,
>>>   };
>>>   struct qcom_ssr_notify_data {
>>> @@ -29,6 +31,10 @@ struct qcom_ssr_notify_data {
>>>   void *qcom_register_ssr_notifier(const char *name, struct 
>>> notifier_block *nb);
>>>   int qcom_unregister_ssr_notifier(void *notify, struct 
>>> notifier_block *nb);
>>> +void *qcom_register_ssr_atomic_notifier(const char *name,
>>> +                    struct notifier_block *nb);
>>> +int qcom_unregister_ssr_atomic_notifier(void *notify,
>>> +                    struct notifier_block *nb);
>>>   #else
>>>   static inline void *qcom_register_ssr_notifier(const char *name,
>>> @@ -43,6 +49,17 @@ static inline int 
>>> qcom_unregister_ssr_notifier(void *notify,
>>>       return 0;
>>>   }
>>> +static inline void *qcom_register_ssr_atomic_notifier(const char *name,
>>> +                              struct notifier_block *nb)
>>> +{
>>> +    return 0;
>>> +}
>>> +
>>> +static inline int qcom_unregister_ssr_atomic_notifier(void *notify,
>>> +                              struct notifier_block *nb)
>>> +{
>>> +    return 0;
>>> +}
>>>   #endif
>>>   #endif
>>
Vignesh Viswanathan June 15, 2023, 10:40 a.m. UTC | #4
On 5/29/2023 9:27 AM, Vignesh Viswanathan wrote:
> Gentle Reminder.
> 
> On 5/22/2023 3:03 PM, Mukesh Ojha wrote:
>>
>>
>> On 5/3/2023 4:56 PM, Mukesh Ojha wrote:
>>>
>>>
>>> On 5/3/2023 11:51 AM, Vignesh Viswanathan wrote:
>>>> Currently the SSR subdevice notifies the client driver on crash of the
>>>> rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
>>>> However the client driver might be interested to know that the device
>>>> has crashed immediately to pause any further transactions with the
>>>> rproc. This calls for an event to be sent to the driver in the IRQ
>>>> context as soon as the rproc crashes.
>>>>
>>>> Add NOTIFY_FATAL event to SSR subdevice to atomically notify rproc has
>>>> crashed to the client driver.
>>>>
>>>> Validated the event in IPQ9574 and IPQ5332 by forcing the rproc to 
>>>> crash
>>>> and ensuring the registered notifier function receives the notification
>>>> in IRQ context.
>>>
>>> This was one of valid use case we encounter in android, We have some 
>>> other way of doing the same thing without core kernel change with
>>> something called early notifiers.
>>>
>>> https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/7583d24de337aa1bf7c375a7da706af9b995b9a1#a840754ebb0e24e88adbf48177e1abd0830b72d2
>>>
>>> https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/257de41c63a5a51a081cc7887cdaa4a46e4d1744
>>>
>>> But good to address this if possible.
>>
>> Ack the idea of early notifier;
>> But here, atomic does not guarantees it to be atomic.
>>
>> Acked-by: Mukesh Ojha <quic_mojha@quicinc.com>
>>
>> -- Mukesh
>>
Gentle Reminder!

Thanks,
Vignesh

>>
>>>
>>> -Mukesh
>>>>
>>>> Signed-off-by: Vignesh Viswanathan <quic_viswanat@quicinc.com>
>>>> ---
>>>>   drivers/remoteproc/qcom_common.c      | 60 
>>>> +++++++++++++++++++++++++++
>>>>   drivers/remoteproc/remoteproc_core.c  | 12 ++++++
>>>>   include/linux/remoteproc.h            |  3 ++
>>>>   include/linux/remoteproc/qcom_rproc.h | 17 ++++++++
>>>>   4 files changed, 92 insertions(+)
>>>>
>>>> diff --git a/drivers/remoteproc/qcom_common.c 
>>>> b/drivers/remoteproc/qcom_common.c
>>>> index a0d4238492e9..76542229aeb6 100644
>>>> --- a/drivers/remoteproc/qcom_common.c
>>>> +++ b/drivers/remoteproc/qcom_common.c
>>>> @@ -84,6 +84,7 @@ struct minidump_global_toc {
>>>>   struct qcom_ssr_subsystem {
>>>>       const char *name;
>>>>       struct srcu_notifier_head notifier_list;
>>>> +    struct atomic_notifier_head atomic_notifier_list;
>>>>       struct list_head list;
>>>>   };
>>>> @@ -366,6 +367,7 @@ static struct qcom_ssr_subsystem 
>>>> *qcom_ssr_get_subsys(const char *name)
>>>>       }
>>>>       info->name = kstrdup_const(name, GFP_KERNEL);
>>>>       srcu_init_notifier_head(&info->notifier_list);
>>>> +    ATOMIC_INIT_NOTIFIER_HEAD(&info->atomic_notifier_list);
>>>>       /* Add to global notification list */
>>>>       list_add_tail(&info->list, &qcom_ssr_subsystem_list);
>>>> @@ -417,6 +419,51 @@ int qcom_unregister_ssr_notifier(void *notify, 
>>>> struct notifier_block *nb)
>>>>   }
>>>>   EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
>>>> +/**
>>>> + * qcom_register_ssr_atomic_notifier() - register SSR Atomic 
>>>> notification
>>>> + *                     handler
>>>> + * @name:    Subsystem's SSR name
>>>> + * @nb:    notifier_block to be invoked upon subsystem's state change
>>>> + *
>>>> + * This registers the @nb notifier block as part the atomic notifier
>>>> + * chain for a remoteproc associated with @name. The notifier 
>>>> block's callback
>>>> + * will be invoked when the remote processor crashes in atomic 
>>>> context before
>>>> + * the recovery process is queued.
>>>> + *
>>>> + * Return: a subsystem cookie on success, ERR_PTR on failure.
>>>> + */
>>>> +void *qcom_register_ssr_atomic_notifier(const char *name,
>>>> +                    struct notifier_block *nb)
>>>> +{
>>>> +    struct qcom_ssr_subsystem *info;
>>>> +
>>>> +    info = qcom_ssr_get_subsys(name);
>>>> +    if (IS_ERR(info))
>>>> +        return info;
>>>> +
>>>> +    atomic_notifier_chain_register(&info->atomic_notifier_list, nb);
>>>> +
>>>> +    return &info->atomic_notifier_list;
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(qcom_register_ssr_atomic_notifier);
>>>> +
>>>> +/**
>>>> + * qcom_unregister_ssr_atomic_notifier() - unregister SSR Atomic 
>>>> notification
>>>> + *                       handler
>>>> + * @notify:    subsystem cookie returned from 
>>>> qcom_register_ssr_notifier
>>>> + * @nb:        notifier_block to unregister
>>>> + *
>>>> + * This function will unregister the notifier from the atomic notifier
>>>> + * chain.
>>>> + *
>>>> + * Return: 0 on success, %ENOENT otherwise.
>>>> + */
>>>> +int qcom_unregister_ssr_atomic_notifier(void *notify, struct 
>>>> notifier_block *nb)
>>>> +{
>>>> +    return atomic_notifier_chain_unregister(notify, nb);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(qcom_unregister_ssr_atomic_notifier);
>>>> +
>>>>   static int ssr_notify_prepare(struct rproc_subdev *subdev)
>>>>   {
>>>>       struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
>>>> @@ -467,6 +514,18 @@ static void ssr_notify_unprepare(struct 
>>>> rproc_subdev *subdev)
>>>>                    QCOM_SSR_AFTER_SHUTDOWN, &data);
>>>>   }
>>>> +static void ssr_notify_crash(struct rproc_subdev *subdev)
>>>> +{
>>>> +    struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
>>>> +    struct qcom_ssr_notify_data data = {
>>>> +        .name = ssr->info->name,
>>>> +        .crashed = true,
>>>> +    };
>>>> +
>>>> +    atomic_notifier_call_chain(&ssr->info->atomic_notifier_list,
>>>> +                   QCOM_SSR_NOTIFY_CRASH, &data);
>>>> +}
>>>> +
>>>>   /**
>>>>    * qcom_add_ssr_subdev() - register subdevice as restart 
>>>> notification source
>>>>    * @rproc:    rproc handle
>>>> @@ -493,6 +552,7 @@ void qcom_add_ssr_subdev(struct rproc *rproc, 
>>>> struct qcom_rproc_ssr *ssr,
>>>>       ssr->subdev.start = ssr_notify_start;
>>>>       ssr->subdev.stop = ssr_notify_stop;
>>>>       ssr->subdev.unprepare = ssr_notify_unprepare;
>>>> +    ssr->subdev.notify_crash = ssr_notify_crash;
>>>>       rproc_add_subdev(rproc, &ssr->subdev);
>>>>   }
>>>> diff --git a/drivers/remoteproc/remoteproc_core.c 
>>>> b/drivers/remoteproc/remoteproc_core.c
>>>> index 695cce218e8c..3de0ece158ea 100644
>>>> --- a/drivers/remoteproc/remoteproc_core.c
>>>> +++ b/drivers/remoteproc/remoteproc_core.c
>>>> @@ -1139,6 +1139,16 @@ static void rproc_unprepare_subdevices(struct 
>>>> rproc *rproc)
>>>>       }
>>>>   }
>>>> +static void rproc_notify_crash_subdevices(struct rproc *rproc)
>>>> +{
>>>> +    struct rproc_subdev *subdev;
>>>> +
>>>> +    list_for_each_entry_reverse(subdev, &rproc->subdevs, node) {
>>>> +        if (subdev->notify_crash)
>>>> +            subdev->notify_crash(subdev);
>>>> +    }
>>>> +}
>>>> +
>>>>   /**
>>>>    * rproc_alloc_registered_carveouts() - allocate all carveouts 
>>>> registered
>>>>    * in the list
>>>> @@ -2687,6 +2697,8 @@ void rproc_report_crash(struct rproc *rproc, 
>>>> enum rproc_crash_type type)
>>>>       dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>>>>           rproc->name, rproc_crash_to_string(type));
>>>> +    rproc_notify_crash_subdevices(rproc);
>>>> +
>>>>       queue_work(rproc_recovery_wq, &rproc->crash_handler);
>>>>   }
>>>>   EXPORT_SYMBOL(rproc_report_crash);
>>>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>>>> index fe8978eb69f1..f3c0e0103e81 100644
>>>> --- a/include/linux/remoteproc.h
>>>> +++ b/include/linux/remoteproc.h
>>>> @@ -596,6 +596,8 @@ struct rproc {
>>>>    * @stop: stop function, called before the rproc is stopped; the 
>>>> @crashed
>>>>    *        parameter indicates if this originates from a recovery
>>>>    * @unprepare: unprepare function, called after the rproc has been 
>>>> stopped
>>>> + * @notify_crash: notify_crash function, called in atomic context 
>>>> to notify
>>>> + *          rproc has crashed and recovery is about to start
>>>>    */
>>>>   struct rproc_subdev {
>>>>       struct list_head node;
>>>> @@ -604,6 +606,7 @@ struct rproc_subdev {
>>>>       int (*start)(struct rproc_subdev *subdev);
>>>>       void (*stop)(struct rproc_subdev *subdev, bool crashed);
>>>>       void (*unprepare)(struct rproc_subdev *subdev);
>>>> +    void (*notify_crash)(struct rproc_subdev *subdev);
>>>>   };
>>>>   /* we currently support only two vrings per rvdev */
>>>> diff --git a/include/linux/remoteproc/qcom_rproc.h 
>>>> b/include/linux/remoteproc/qcom_rproc.h
>>>> index 82b211518136..f3d06900f297 100644
>>>> --- a/include/linux/remoteproc/qcom_rproc.h
>>>> +++ b/include/linux/remoteproc/qcom_rproc.h
>>>> @@ -11,12 +11,14 @@ struct notifier_block;
>>>>    * @QCOM_SSR_AFTER_POWERUP:    Remoteproc is running (start stage)
>>>>    * @QCOM_SSR_BEFORE_SHUTDOWN:    Remoteproc crashed or shutting 
>>>> down (stop stage)
>>>>    * @QCOM_SSR_AFTER_SHUTDOWN:    Remoteproc is down (unprepare stage)
>>>> + * @QCOM_SSR_NOTIFY_CRASH:    Remoteproc crashed
>>>>    */
>>>>   enum qcom_ssr_notify_type {
>>>>       QCOM_SSR_BEFORE_POWERUP,
>>>>       QCOM_SSR_AFTER_POWERUP,
>>>>       QCOM_SSR_BEFORE_SHUTDOWN,
>>>>       QCOM_SSR_AFTER_SHUTDOWN,
>>>> +    QCOM_SSR_NOTIFY_CRASH,
>>>>   };
>>>>   struct qcom_ssr_notify_data {
>>>> @@ -29,6 +31,10 @@ struct qcom_ssr_notify_data {
>>>>   void *qcom_register_ssr_notifier(const char *name, struct 
>>>> notifier_block *nb);
>>>>   int qcom_unregister_ssr_notifier(void *notify, struct 
>>>> notifier_block *nb);
>>>> +void *qcom_register_ssr_atomic_notifier(const char *name,
>>>> +                    struct notifier_block *nb);
>>>> +int qcom_unregister_ssr_atomic_notifier(void *notify,
>>>> +                    struct notifier_block *nb);
>>>>   #else
>>>>   static inline void *qcom_register_ssr_notifier(const char *name,
>>>> @@ -43,6 +49,17 @@ static inline int 
>>>> qcom_unregister_ssr_notifier(void *notify,
>>>>       return 0;
>>>>   }
>>>> +static inline void *qcom_register_ssr_atomic_notifier(const char 
>>>> *name,
>>>> +                              struct notifier_block *nb)
>>>> +{
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static inline int qcom_unregister_ssr_atomic_notifier(void *notify,
>>>> +                              struct notifier_block *nb)
>>>> +{
>>>> +    return 0;
>>>> +}
>>>>   #endif
>>>>   #endif
>>>
Vignesh Viswanathan July 11, 2023, 8 a.m. UTC | #5
On 6/15/2023 4:10 PM, Vignesh Viswanathan wrote:
> 
> 
> On 5/29/2023 9:27 AM, Vignesh Viswanathan wrote:
>> Gentle Reminder.
>>
>> On 5/22/2023 3:03 PM, Mukesh Ojha wrote:
>>>
>>>
>>> On 5/3/2023 4:56 PM, Mukesh Ojha wrote:
>>>>
>>>>
>>>> On 5/3/2023 11:51 AM, Vignesh Viswanathan wrote:
>>>>> Currently the SSR subdevice notifies the client driver on crash of the
>>>>> rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
>>>>> However the client driver might be interested to know that the device
>>>>> has crashed immediately to pause any further transactions with the
>>>>> rproc. This calls for an event to be sent to the driver in the IRQ
>>>>> context as soon as the rproc crashes.
>>>>>
>>>>> Add NOTIFY_FATAL event to SSR subdevice to atomically notify rproc has
>>>>> crashed to the client driver.
>>>>>
>>>>> Validated the event in IPQ9574 and IPQ5332 by forcing the rproc to 
>>>>> crash
>>>>> and ensuring the registered notifier function receives the 
>>>>> notification
>>>>> in IRQ context.
>>>>
>>>> This was one of valid use case we encounter in android, We have some 
>>>> other way of doing the same thing without core kernel change with
>>>> something called early notifiers.
>>>>
>>>> https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/7583d24de337aa1bf7c375a7da706af9b995b9a1#a840754ebb0e24e88adbf48177e1abd0830b72d2
>>>>
>>>> https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/257de41c63a5a51a081cc7887cdaa4a46e4d1744
>>>>
>>>> But good to address this if possible.
>>>
>>> Ack the idea of early notifier;
>>> But here, atomic does not guarantees it to be atomic.
>>>
>>> Acked-by: Mukesh Ojha <quic_mojha@quicinc.com>
>>>
>>> -- Mukesh
>>>
> Gentle Reminder!
> 
> Thanks,
> Vignesh
> 

Gentle reminder for review!

Thanks,
Vignesh

>>>
>>>>
>>>> -Mukesh
>>>>>
>>>>> Signed-off-by: Vignesh Viswanathan <quic_viswanat@quicinc.com>
>>>>> ---
>>>>>   drivers/remoteproc/qcom_common.c     | 60 
>>>>> +++++++++++++++++++++++++++
>>>>>   drivers/remoteproc/remoteproc_core.c  | 12 ++++++
>>>>>   include/linux/remoteproc.h            |  3 ++
>>>>>   include/linux/remoteproc/qcom_rproc.h | 17 ++++++++
>>>>>   4 files changed, 92 insertions(+)
>>>>>
>>>>> diff --git a/drivers/remoteproc/qcom_common.c 
>>>>> b/drivers/remoteproc/qcom_common.c
>>>>> index a0d4238492e9..76542229aeb6 100644
>>>>> --- a/drivers/remoteproc/qcom_common.c
>>>>> +++ b/drivers/remoteproc/qcom_common.c
>>>>> @@ -84,6 +84,7 @@ struct minidump_global_toc {
>>>>>   struct qcom_ssr_subsystem {
>>>>>       const char *name;
>>>>>       struct srcu_notifier_head notifier_list;
>>>>> +    struct atomic_notifier_head atomic_notifier_list;
>>>>>       struct list_head list;
>>>>>   };
>>>>> @@ -366,6 +367,7 @@ static struct qcom_ssr_subsystem 
>>>>> *qcom_ssr_get_subsys(const char *name)
>>>>>       }
>>>>>       info->name = kstrdup_const(name, GFP_KERNEL);
>>>>>       srcu_init_notifier_head(&info->notifier_list);
>>>>> +    ATOMIC_INIT_NOTIFIER_HEAD(&info->atomic_notifier_list);
>>>>>       /* Add to global notification list */
>>>>>       list_add_tail(&info->list, &qcom_ssr_subsystem_list);
>>>>> @@ -417,6 +419,51 @@ int qcom_unregister_ssr_notifier(void *notify, 
>>>>> struct notifier_block *nb)
>>>>>   }
>>>>>   EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
>>>>> +/**
>>>>> + * qcom_register_ssr_atomic_notifier() - register SSR Atomic 
>>>>> notification
>>>>> + *                     handler
>>>>> + * @name:    Subsystem's SSR name
>>>>> + * @nb:    notifier_block to be invoked upon subsystem's state change
>>>>> + *
>>>>> + * This registers the @nb notifier block as part the atomic notifier
>>>>> + * chain for a remoteproc associated with @name. The notifier 
>>>>> block's callback
>>>>> + * will be invoked when the remote processor crashes in atomic 
>>>>> context before
>>>>> + * the recovery process is queued.
>>>>> + *
>>>>> + * Return: a subsystem cookie on success, ERR_PTR on failure.
>>>>> + */
>>>>> +void *qcom_register_ssr_atomic_notifier(const char *name,
>>>>> +                    struct notifier_block *nb)
>>>>> +{
>>>>> +    struct qcom_ssr_subsystem *info;
>>>>> +
>>>>> +    info = qcom_ssr_get_subsys(name);
>>>>> +    if (IS_ERR(info))
>>>>> +        return info;
>>>>> +
>>>>> +    atomic_notifier_chain_register(&info->atomic_notifier_list, nb);
>>>>> +
>>>>> +    return &info->atomic_notifier_list;
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(qcom_register_ssr_atomic_notifier);
>>>>> +
>>>>> +/**
>>>>> + * qcom_unregister_ssr_atomic_notifier() - unregister SSR Atomic 
>>>>> notification
>>>>> + *                       handler
>>>>> + * @notify:    subsystem cookie returned from 
>>>>> qcom_register_ssr_notifier
>>>>> + * @nb:        notifier_block to unregister
>>>>> + *
>>>>> + * This function will unregister the notifier from the atomic 
>>>>> notifier
>>>>> + * chain.
>>>>> + *
>>>>> + * Return: 0 on success, %ENOENT otherwise.
>>>>> + */
>>>>> +int qcom_unregister_ssr_atomic_notifier(void *notify, struct 
>>>>> notifier_block *nb)
>>>>> +{
>>>>> +    return atomic_notifier_chain_unregister(notify, nb);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(qcom_unregister_ssr_atomic_notifier);
>>>>> +
>>>>>   static int ssr_notify_prepare(struct rproc_subdev *subdev)
>>>>>   {
>>>>>       struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
>>>>> @@ -467,6 +514,18 @@ static void ssr_notify_unprepare(struct 
>>>>> rproc_subdev *subdev)
>>>>>                    QCOM_SSR_AFTER_SHUTDOWN, &data);
>>>>>   }
>>>>> +static void ssr_notify_crash(struct rproc_subdev *subdev)
>>>>> +{
>>>>> +    struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
>>>>> +    struct qcom_ssr_notify_data data = {
>>>>> +        .name = ssr->info->name,
>>>>> +        .crashed = true,
>>>>> +    };
>>>>> +
>>>>> +    atomic_notifier_call_chain(&ssr->info->atomic_notifier_list,
>>>>> +                   QCOM_SSR_NOTIFY_CRASH, &data);
>>>>> +}
>>>>> +
>>>>>   /**
>>>>>    * qcom_add_ssr_subdev() - register subdevice as restart 
>>>>> notification source
>>>>>    * @rproc:    rproc handle
>>>>> @@ -493,6 +552,7 @@ void qcom_add_ssr_subdev(struct rproc *rproc, 
>>>>> struct qcom_rproc_ssr *ssr,
>>>>>       ssr->subdev.start = ssr_notify_start;
>>>>>       ssr->subdev.stop = ssr_notify_stop;
>>>>>       ssr->subdev.unprepare = ssr_notify_unprepare;
>>>>> +    ssr->subdev.notify_crash = ssr_notify_crash;
>>>>>       rproc_add_subdev(rproc, &ssr->subdev);
>>>>>   }
>>>>> diff --git a/drivers/remoteproc/remoteproc_core.c 
>>>>> b/drivers/remoteproc/remoteproc_core.c
>>>>> index 695cce218e8c..3de0ece158ea 100644
>>>>> --- a/drivers/remoteproc/remoteproc_core.c
>>>>> +++ b/drivers/remoteproc/remoteproc_core.c
>>>>> @@ -1139,6 +1139,16 @@ static void 
>>>>> rproc_unprepare_subdevices(struct rproc *rproc)
>>>>>       }
>>>>>   }
>>>>> +static void rproc_notify_crash_subdevices(struct rproc *rproc)
>>>>> +{
>>>>> +    struct rproc_subdev *subdev;
>>>>> +
>>>>> +    list_for_each_entry_reverse(subdev, &rproc->subdevs, node) {
>>>>> +        if (subdev->notify_crash)
>>>>> +            subdev->notify_crash(subdev);
>>>>> +    }
>>>>> +}
>>>>> +
>>>>>   /**
>>>>>    * rproc_alloc_registered_carveouts() - allocate all carveouts 
>>>>> registered
>>>>>    * in the list
>>>>> @@ -2687,6 +2697,8 @@ void rproc_report_crash(struct rproc *rproc, 
>>>>> enum rproc_crash_type type)
>>>>>       dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>>>>>           rproc->name, rproc_crash_to_string(type));
>>>>> +    rproc_notify_crash_subdevices(rproc);
>>>>> +
>>>>>       queue_work(rproc_recovery_wq, &rproc->crash_handler);
>>>>>   }
>>>>>   EXPORT_SYMBOL(rproc_report_crash);
>>>>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>>>>> index fe8978eb69f1..f3c0e0103e81 100644
>>>>> --- a/include/linux/remoteproc.h
>>>>> +++ b/include/linux/remoteproc.h
>>>>> @@ -596,6 +596,8 @@ struct rproc {
>>>>>    * @stop: stop function, called before the rproc is stopped; the 
>>>>> @crashed
>>>>>    *        parameter indicates if this originates from a recovery
>>>>>    * @unprepare: unprepare function, called after the rprochas 
>>>>> been stopped
>>>>> + * @notify_crash: notify_crash function, called in atomic context 
>>>>> to notify
>>>>> + *          rproc has crashed and recovery is about to start
>>>>>    */
>>>>>   struct rproc_subdev {
>>>>>       struct list_head node;
>>>>> @@ -604,6 +606,7 @@ struct rproc_subdev {
>>>>>       int (*start)(struct rproc_subdev *subdev);
>>>>>       void (*stop)(struct rproc_subdev *subdev, bool crashed);
>>>>>       void (*unprepare)(struct rproc_subdev *subdev);
>>>>> +    void (*notify_crash)(struct rproc_subdev *subdev);
>>>>>   };
>>>>>   /* we currently support only two vrings per rvdev */
>>>>> diff --git a/include/linux/remoteproc/qcom_rproc.h 
>>>>> b/include/linux/remoteproc/qcom_rproc.h
>>>>> index 82b211518136..f3d06900f297 100644
>>>>> --- a/include/linux/remoteproc/qcom_rproc.h
>>>>> +++ b/include/linux/remoteproc/qcom_rproc.h
>>>>> @@ -11,12 +11,14 @@ struct notifier_block;
>>>>>    * @QCOM_SSR_AFTER_POWERUP:    Remoteproc is running (start stage)
>>>>>    * @QCOM_SSR_BEFORE_SHUTDOWN:    Remoteproc crashed or shutting 
>>>>> down (stop stage)
>>>>>    * @QCOM_SSR_AFTER_SHUTDOWN:    Remoteprocis down (unprepare stage)
>>>>> + * @QCOM_SSR_NOTIFY_CRASH:    Remoteproc crashed
>>>>>    */
>>>>>   enum qcom_ssr_notify_type {
>>>>>       QCOM_SSR_BEFORE_POWERUP,
>>>>>       QCOM_SSR_AFTER_POWERUP,
>>>>>       QCOM_SSR_BEFORE_SHUTDOWN,
>>>>>       QCOM_SSR_AFTER_SHUTDOWN,
>>>>> +    QCOM_SSR_NOTIFY_CRASH,
>>>>>   };
>>>>>   struct qcom_ssr_notify_data {
>>>>> @@ -29,6 +31,10 @@ struct qcom_ssr_notify_data {
>>>>>   void *qcom_register_ssr_notifier(const char *name, struct 
>>>>> notifier_block *nb);
>>>>>   int qcom_unregister_ssr_notifier(void *notify, struct 
>>>>> notifier_block *nb);
>>>>> +void *qcom_register_ssr_atomic_notifier(const char *name,
>>>>> +                    struct notifier_block *nb);
>>>>> +int qcom_unregister_ssr_atomic_notifier(void *notify,
>>>>> +                    struct notifier_block *nb);
>>>>>   #else
>>>>>   static inline void *qcom_register_ssr_notifier(const char *name,
>>>>> @@ -43,6 +49,17 @@ static inline int 
>>>>> qcom_unregister_ssr_notifier(void *notify,
>>>>>       return 0;
>>>>>   }
>>>>> +static inline void *qcom_register_ssr_atomic_notifier(const char 
>>>>> *name,
>>>>> +                              struct notifier_block *nb)
>>>>> +{
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static inline int qcom_unregister_ssr_atomic_notifier(void *notify,
>>>>> +                              struct notifier_block *nb)
>>>>> +{
>>>>> +    return 0;
>>>>> +}
>>>>>   #endif
>>>>>   #endif
>>>>
Bjorn Andersson July 15, 2023, 8:20 p.m. UTC | #6
On Wed, May 03, 2023 at 11:51:46AM +0530, Vignesh Viswanathan wrote:
> Currently the SSR subdevice notifies the client driver on crash of the
> rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
> However the client driver might be interested to know that the device
> has crashed immediately to pause any further transactions with the
> rproc. This calls for an event to be sent to the driver in the IRQ
> context as soon as the rproc crashes.
> 

Please make your argumentation more concrete, I can only guess what
client driver you're referring to.

You can do this either by spelling out which actual problem you're
solving, or better yet, include some patches in the series that actually
uses this interface.

Regards,
Bjorn
Vignesh Viswanathan Sept. 4, 2023, 4:53 p.m. UTC | #7
On 7/16/2023 1:50 AM, Bjorn Andersson wrote:
> On Wed, May 03, 2023 at 11:51:46AM +0530, Vignesh Viswanathan wrote:
>> Currently the SSR subdevice notifies the client driver on crash of the
>> rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
>> However the client driver might be interested to know that the device
>> has crashed immediately to pause any further transactions with the
>> rproc. This calls for an event to be sent to the driver in the IRQ
>> context as soon as the rproc crashes.
>>
> 
> Please make your argumentation more concrete, I can only guess what
> client driver you're referring to.
> 
> You can do this either by spelling out which actual problem you're
> solving, or better yet, include some patches in the series that actually
> uses this interface.
> 

Hi Bjorn,

Apologies for the delay in response.

The client driver in my scenario is a Wi-Fi driver which is continuously
queuing data to the remoteproc and needs to know if remoteproc crashes
as soon as possible to stop queuing further data and also dump some 
debug statistics on the driver side that could potentially help in debug
of why the remoteproc crashed.

Also in the case with upcoming Wi-Fi 7 targets with multi-link 
operation, the driver might need to know that the remoteproc has crashed
instantly to handle some multi-link specific handling.

The ath11k/ath12k WLAN drivers today partially have support for handling
such FATAL notification but it has not been upstreamed yet.

Reference patch: 
https://git.codelinaro.org/clo/qsdk/oss/system/feeds/wlan-open/-/blob/win.wlan_host_opensource.1.0/mac80211/patches/031-ath11k-print-stats-on-crash.patch 
-- event SUBSYS_PREPARE_FOR_FATAL_SHUTDOWN.

Also, Mukesh mentioned earlier that in some MSM targets with PCIe where 
latency cannot be tolerated, a similar downstream patch adds support for 
"early notifier". If this patch is accepted, the early notifier can also 
be replaced to use the same NOTIFY_FATAL event from SSR Subdevice

https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/7583d24de337aa1bf7c375a7da706af9b995b9a1#a840754ebb0e24e88adbf48177e1abd0830b72d2
https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/257de41c63a5a51a081cc7887cdaa4a46e4d1744

Thanks,
Vignesh

> Regards,
> Bjorn
Vignesh Viswanathan Nov. 24, 2023, 6:54 p.m. UTC | #8
On 9/4/2023 10:23 PM, Vignesh Viswanathan wrote:
> 
> 
> On 7/16/2023 1:50 AM, Bjorn Andersson wrote:
>> On Wed, May 03, 2023 at 11:51:46AM +0530, Vignesh Viswanathan wrote:
>>> Currently the SSR subdevice notifies the client driver on crash of the
>>> rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
>>> However the client driver might be interested to know that the device
>>> has crashed immediately to pause any further transactions with the
>>> rproc. This calls for an event to be sent to the driver in the IRQ
>>> context as soon as the rproc crashes.
>>>
>>
>> Please make your argumentation more concrete, I can only guess what
>> client driver you're referring to.
>>
>> You can do this either by spelling out which actual problem you're
>> solving, or better yet, include some patches in the series that actually
>> uses this interface.
>>
> 
> Hi Bjorn,
> 
> Apologies for the delay in response.
> 
> The client driver in my scenario is a Wi-Fi driver which is continuously
> queuing data to the remoteproc and needs to know if remoteproc crashes
> as soon as possible to stop queuing further data and also dump some 
> debug statistics on the driver side that could potentially help in debug
> of why the remoteproc crashed.
> 
> Also in the case with upcoming Wi-Fi 7 targets with multi-link 
> operation, the driver might need to know that the remoteproc has crashed
> instantly to handle some multi-link specific handling.
> 
> The ath11k/ath12k WLAN drivers today partially have support for handling
> such FATAL notification but it has not been upstreamed yet.
> 
> Reference patch: 
> https://git.codelinaro.org/clo/qsdk/oss/system/feeds/wlan-open/-/blob/win.wlan_host_opensource.1.0/mac80211/patches/031-ath11k-print-stats-on-crash.patch -- event SUBSYS_PREPARE_FOR_FATAL_SHUTDOWN.
> 
> Also, Mukesh mentioned earlier that in some MSM targets with PCIe where 
> latency cannot be tolerated, a similar downstream patch adds support for 
> "early notifier". If this patch is accepted, the early notifier can also 
> be replaced to use the same NOTIFY_FATAL event from SSR Subdevice
> 
> https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/7583d24de337aa1bf7c375a7da706af9b995b9a1#a840754ebb0e24e88adbf48177e1abd0830b72d2
> https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/257de41c63a5a51a081cc7887cdaa4a46e4d1744

Hi Bjorn,

Gentle reminder for this patch.

Thanks,
Vignesh
> 
> Thanks,
> Vignesh
> 
>> Regards,
>> Bjorn
Vignesh Viswanathan Feb. 7, 2024, 4:21 p.m. UTC | #9
On 11/25/2023 12:24 AM, Vignesh Viswanathan wrote:
> 
> 
> On 9/4/2023 10:23 PM, Vignesh Viswanathan wrote:
>>
>>
>> On 7/16/2023 1:50 AM, Bjorn Andersson wrote:
>>> On Wed, May 03, 2023 at 11:51:46AM +0530, Vignesh Viswanathan wrote:
>>>> Currently the SSR subdevice notifies the client driver on crash of the
>>>> rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
>>>> However the client driver might be interested to know that the device
>>>> has crashed immediately to pause any further transactions with the
>>>> rproc. This calls for an event to be sent to the driver in the IRQ
>>>> context as soon as the rproc crashes.
>>>>
>>>
>>> Please make your argumentation more concrete, I can only guess what
>>> client driver you're referring to.
>>>
>>> You can do this either by spelling out which actual problem you're
>>> solving, or better yet, include some patches in the series that actually
>>> uses this interface.
>>>
>>
>> Hi Bjorn,
>>
>> Apologies for the delay in response.
>>
>> The client driver in my scenario is a Wi-Fi driver which is continuously
>> queuing data to the remoteproc and needs to know if remoteproc crashes
>> as soon as possible to stop queuing further data and also dump some debugstatistics on the driver side that could potentially help in debug
>> of why the remoteproc crashed.
>>
>> Also in the case with upcoming Wi-Fi 7 targets with multi-link operation, the driver might need to know that the remoteproc has crashed
>> instantly to handle some multi-link specific handling.
>>
>> The ath11k/ath12k WLAN drivers today partially have support for handling
>> such FATAL notification but it has not been upstreamed yet.
>>
>> Reference patch: https://git.codelinaro.org/clo/qsdk/oss/system/feeds/wlan-open/-/blob/win.wlan_host_opensource.1.0/mac80211/patches/031-ath11k-print-stats-on-crash.patch -- event SUBSYS_PREPARE_FOR_FATAL_SHUTDOWN.
>>
>> Also, Mukesh mentioned earlier that in some MSM targets with PCIe where latency cannot be tolerated, a similar downstream patch adds support for "early notifier". If this patch is accepted, the early notifier can also be replaced to use the same NOTIFY_FATAL event from SSR Subdevice
>>
>> https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/7583d24de337aa1bf7c375a7da706af9b995b9a1#a840754ebb0e24e88adbf48177e1abd0830b72d2
>> https://git.codelinaro.org/clo/la/kernel/msm-5.15/-/commit/257de41c63a5a51a081cc7887cdaa4a46e4d1744
> 
> Hi Bjorn,
> 
> Gentle reminder for this patch.
> 
> Thanks,
> Vignesh

Hi Bjorn,

Could you please help review this patch and let me know if any comments.

Thanks,
Vignesh
>>
>> Thanks,
>> Vignesh
>>
>>> Regards,
>>> Bjorn
diff mbox series

Patch

diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
index a0d4238492e9..76542229aeb6 100644
--- a/drivers/remoteproc/qcom_common.c
+++ b/drivers/remoteproc/qcom_common.c
@@ -84,6 +84,7 @@  struct minidump_global_toc {
 struct qcom_ssr_subsystem {
 	const char *name;
 	struct srcu_notifier_head notifier_list;
+	struct atomic_notifier_head atomic_notifier_list;
 	struct list_head list;
 };
 
@@ -366,6 +367,7 @@  static struct qcom_ssr_subsystem *qcom_ssr_get_subsys(const char *name)
 	}
 	info->name = kstrdup_const(name, GFP_KERNEL);
 	srcu_init_notifier_head(&info->notifier_list);
+	ATOMIC_INIT_NOTIFIER_HEAD(&info->atomic_notifier_list);
 
 	/* Add to global notification list */
 	list_add_tail(&info->list, &qcom_ssr_subsystem_list);
@@ -417,6 +419,51 @@  int qcom_unregister_ssr_notifier(void *notify, struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
 
+/**
+ * qcom_register_ssr_atomic_notifier() - register SSR Atomic notification
+ *					 handler
+ * @name:	Subsystem's SSR name
+ * @nb:	notifier_block to be invoked upon subsystem's state change
+ *
+ * This registers the @nb notifier block as part the atomic notifier
+ * chain for a remoteproc associated with @name. The notifier block's callback
+ * will be invoked when the remote processor crashes in atomic context before
+ * the recovery process is queued.
+ *
+ * Return: a subsystem cookie on success, ERR_PTR on failure.
+ */
+void *qcom_register_ssr_atomic_notifier(const char *name,
+					struct notifier_block *nb)
+{
+	struct qcom_ssr_subsystem *info;
+
+	info = qcom_ssr_get_subsys(name);
+	if (IS_ERR(info))
+		return info;
+
+	atomic_notifier_chain_register(&info->atomic_notifier_list, nb);
+
+	return &info->atomic_notifier_list;
+}
+EXPORT_SYMBOL_GPL(qcom_register_ssr_atomic_notifier);
+
+/**
+ * qcom_unregister_ssr_atomic_notifier() - unregister SSR Atomic notification
+ *					   handler
+ * @notify:	subsystem cookie returned from qcom_register_ssr_notifier
+ * @nb:		notifier_block to unregister
+ *
+ * This function will unregister the notifier from the atomic notifier
+ * chain.
+ *
+ * Return: 0 on success, %ENOENT otherwise.
+ */
+int qcom_unregister_ssr_atomic_notifier(void *notify, struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(notify, nb);
+}
+EXPORT_SYMBOL_GPL(qcom_unregister_ssr_atomic_notifier);
+
 static int ssr_notify_prepare(struct rproc_subdev *subdev)
 {
 	struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
@@ -467,6 +514,18 @@  static void ssr_notify_unprepare(struct rproc_subdev *subdev)
 				 QCOM_SSR_AFTER_SHUTDOWN, &data);
 }
 
+static void ssr_notify_crash(struct rproc_subdev *subdev)
+{
+	struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
+	struct qcom_ssr_notify_data data = {
+		.name = ssr->info->name,
+		.crashed = true,
+	};
+
+	atomic_notifier_call_chain(&ssr->info->atomic_notifier_list,
+				   QCOM_SSR_NOTIFY_CRASH, &data);
+}
+
 /**
  * qcom_add_ssr_subdev() - register subdevice as restart notification source
  * @rproc:	rproc handle
@@ -493,6 +552,7 @@  void qcom_add_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr,
 	ssr->subdev.start = ssr_notify_start;
 	ssr->subdev.stop = ssr_notify_stop;
 	ssr->subdev.unprepare = ssr_notify_unprepare;
+	ssr->subdev.notify_crash = ssr_notify_crash;
 
 	rproc_add_subdev(rproc, &ssr->subdev);
 }
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 695cce218e8c..3de0ece158ea 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1139,6 +1139,16 @@  static void rproc_unprepare_subdevices(struct rproc *rproc)
 	}
 }
 
+static void rproc_notify_crash_subdevices(struct rproc *rproc)
+{
+	struct rproc_subdev *subdev;
+
+	list_for_each_entry_reverse(subdev, &rproc->subdevs, node) {
+		if (subdev->notify_crash)
+			subdev->notify_crash(subdev);
+	}
+}
+
 /**
  * rproc_alloc_registered_carveouts() - allocate all carveouts registered
  * in the list
@@ -2687,6 +2697,8 @@  void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
 	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
 		rproc->name, rproc_crash_to_string(type));
 
+	rproc_notify_crash_subdevices(rproc);
+
 	queue_work(rproc_recovery_wq, &rproc->crash_handler);
 }
 EXPORT_SYMBOL(rproc_report_crash);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index fe8978eb69f1..f3c0e0103e81 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -596,6 +596,8 @@  struct rproc {
  * @stop: stop function, called before the rproc is stopped; the @crashed
  *	    parameter indicates if this originates from a recovery
  * @unprepare: unprepare function, called after the rproc has been stopped
+ * @notify_crash: notify_crash function, called in atomic context to notify
+ *		  rproc has crashed and recovery is about to start
  */
 struct rproc_subdev {
 	struct list_head node;
@@ -604,6 +606,7 @@  struct rproc_subdev {
 	int (*start)(struct rproc_subdev *subdev);
 	void (*stop)(struct rproc_subdev *subdev, bool crashed);
 	void (*unprepare)(struct rproc_subdev *subdev);
+	void (*notify_crash)(struct rproc_subdev *subdev);
 };
 
 /* we currently support only two vrings per rvdev */
diff --git a/include/linux/remoteproc/qcom_rproc.h b/include/linux/remoteproc/qcom_rproc.h
index 82b211518136..f3d06900f297 100644
--- a/include/linux/remoteproc/qcom_rproc.h
+++ b/include/linux/remoteproc/qcom_rproc.h
@@ -11,12 +11,14 @@  struct notifier_block;
  * @QCOM_SSR_AFTER_POWERUP:	Remoteproc is running (start stage)
  * @QCOM_SSR_BEFORE_SHUTDOWN:	Remoteproc crashed or shutting down (stop stage)
  * @QCOM_SSR_AFTER_SHUTDOWN:	Remoteproc is down (unprepare stage)
+ * @QCOM_SSR_NOTIFY_CRASH:	Remoteproc crashed
  */
 enum qcom_ssr_notify_type {
 	QCOM_SSR_BEFORE_POWERUP,
 	QCOM_SSR_AFTER_POWERUP,
 	QCOM_SSR_BEFORE_SHUTDOWN,
 	QCOM_SSR_AFTER_SHUTDOWN,
+	QCOM_SSR_NOTIFY_CRASH,
 };
 
 struct qcom_ssr_notify_data {
@@ -29,6 +31,10 @@  struct qcom_ssr_notify_data {
 void *qcom_register_ssr_notifier(const char *name, struct notifier_block *nb);
 int qcom_unregister_ssr_notifier(void *notify, struct notifier_block *nb);
 
+void *qcom_register_ssr_atomic_notifier(const char *name,
+					struct notifier_block *nb);
+int qcom_unregister_ssr_atomic_notifier(void *notify,
+					struct notifier_block *nb);
 #else
 
 static inline void *qcom_register_ssr_notifier(const char *name,
@@ -43,6 +49,17 @@  static inline int qcom_unregister_ssr_notifier(void *notify,
 	return 0;
 }
 
+static inline void *qcom_register_ssr_atomic_notifier(const char *name,
+						      struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int qcom_unregister_ssr_atomic_notifier(void *notify,
+						      struct notifier_block *nb)
+{
+	return 0;
+}
 #endif
 
 #endif