diff mbox

thermal: omap-thermal: Add notify function to thermal_zone_device_ops

Message ID 1455599145-5656-1-git-send-email-j-keerthy@ti.com (mailing list archive)
State Changes Requested
Delegated to: Eduardo Valentin
Headers show

Commit Message

J, KEERTHY Feb. 16, 2016, 5:05 a.m. UTC
notify function is used to notify when some temperature thresholds
are crossed. In case we get notified for a critical trip point then
schedule an emergency shutdown function to backup orderly_poweroff
failures.

orderly_poweroff is triggered when a graceful shutdown
of system is desired. This may be used in many critical states of the
kernel such as when subsystems detects conditions such as critical
temperature conditions. However, in certain conditions in system
boot up sequences like those in the middle of driver probes being
initiated, userspace will be unable to power off the system in a clean
manner and leaves the system in a critical state. In cases like these,
the /sbin/poweroff will return success (having forked off to attempt
powering off the system. However, the system overall will fail to
completely poweroff (since other modules will be probed) and the system
is still functional with no userspace (since that would have shut itself
off).

However, there is no clean way of detecting such failure of userspace
powering off the system. In such scenarios, it is necessary for a backup
workqueue to be able to force a shutdown of the system when orderly
shutdown is not successful after a configurable time period.

Signed-off-by: Keerthy <j-keerthy@ti.com>
---

The previous discussion:

https://lkml.org/lkml/2016/1/28/989

Making use of the notify ops to schedule a backup thermal shutdown.

 drivers/thermal/ti-soc-thermal/Kconfig             | 13 +++++++
 drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 43 ++++++++++++++++++++++
 2 files changed, 56 insertions(+)

Comments

Keerthy March 7, 2016, 7:31 a.m. UTC | #1
On Tuesday 16 February 2016 10:35 AM, Keerthy wrote:
> notify function is used to notify when some temperature thresholds
> are crossed. In case we get notified for a critical trip point then
> schedule an emergency shutdown function to backup orderly_poweroff
> failures.
>
> orderly_poweroff is triggered when a graceful shutdown
> of system is desired. This may be used in many critical states of the
> kernel such as when subsystems detects conditions such as critical
> temperature conditions. However, in certain conditions in system
> boot up sequences like those in the middle of driver probes being
> initiated, userspace will be unable to power off the system in a clean
> manner and leaves the system in a critical state. In cases like these,
> the /sbin/poweroff will return success (having forked off to attempt
> powering off the system. However, the system overall will fail to
> completely poweroff (since other modules will be probed) and the system
> is still functional with no userspace (since that would have shut itself
> off).
>
> However, there is no clean way of detecting such failure of userspace
> powering off the system. In such scenarios, it is necessary for a backup
> workqueue to be able to force a shutdown of the system when orderly
> shutdown is not successful after a configurable time period.
>

Eduardo,

A gentle ping on this.

Regards,
Keerthy
> Signed-off-by: Keerthy <j-keerthy@ti.com>
> ---
>
> The previous discussion:
>
> https://lkml.org/lkml/2016/1/28/989
>
> Making use of the notify ops to schedule a backup thermal shutdown.
>
>   drivers/thermal/ti-soc-thermal/Kconfig             | 13 +++++++
>   drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 43 ++++++++++++++++++++++
>   2 files changed, 56 insertions(+)
>
> diff --git a/drivers/thermal/ti-soc-thermal/Kconfig b/drivers/thermal/ti-soc-thermal/Kconfig
> index ea8283f..b4b9668 100644
> --- a/drivers/thermal/ti-soc-thermal/Kconfig
> +++ b/drivers/thermal/ti-soc-thermal/Kconfig
> @@ -71,3 +71,16 @@ config DRA752_THERMAL
>
>   	  This includes alert interrupts generation and also the TSHUT
>   	  support.
> +
> +config TI_THERMAL_EMERGENCY_POWEROFF_DELAY_MS
> +	int "Emergency poweroff delay in milli-seconds"
> +	depends on TI_SOC_THERMAL
> +	default 0
> +	help
> +	  The number of milliseconds to delay before emergency
> +	  poweroff kicks in. The delay should be carefully profiled
> +	  so as to give adequate time for orderly_poweroff. In case
> +	  of failure of an orderly_poweroff the emergency poweroff
> +	  kicks in after the delay has elapsed and shuts down the system.
> +
> +	  If set to 0 poweroff will happen immediately.
> diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
> index b213a12..d48eb5b1 100644
> --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
> +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
> @@ -31,6 +31,7 @@
>   #include <linux/cpumask.h>
>   #include <linux/cpu_cooling.h>
>   #include <linux/of.h>
> +#include <linux/reboot.h>
>
>   #include "ti-thermal.h"
>   #include "ti-bandgap.h"
> @@ -286,6 +287,47 @@ static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal,
>   	return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp);
>   }
>
> +/**
> + * emergency_poweroff_func - emergency poweroff work after a known delay
> + * @work: work_struct associated with the emergency poweroff function
> + *
> + * This function is called in very critical situations to force
> + * a kernel poweroff after a configurable timeout value.
> + */
> +static void emergency_poweroff_func(struct work_struct *work)
> +{
> +	pr_warn("Attempting kernel_power_off\n");
> +	kernel_power_off();
> +
> +	pr_warn("kernel_power_off has failed! Attempting emergency_restart\n");
> +	emergency_restart();
> +}
> +
> +static DECLARE_DELAYED_WORK(emergency_poweroff_work, emergency_poweroff_func);
> +
> +/**
> + * emergency_poweroff - Trigger an emergency system poweroff
> + *
> + * This may be called from any critical situation to trigger a system shutdown
> + * after a known period of time. By default the delay is 0 millisecond
> + */
> +void ti_thermal_emergency_poweroff(void)
> +{
> +	schedule_delayed_work(&emergency_poweroff_work,
> +	msecs_to_jiffies(CONFIG_TI_THERMAL_EMERGENCY_POWEROFF_DELAY_MS));
> +}
> +
> +static int ti_thermal_notify(struct thermal_zone_device *thermal, int temp,
> +			     enum thermal_trip_type type)
> +{
> +	if (type == THERMAL_TRIP_CRITICAL) {
> +		pr_warn("critical temperature %d reached", temp);
> +		ti_thermal_emergency_poweroff();
> +	}
> +
> +	return 0;
> +}
> +
>   static const struct thermal_zone_of_device_ops ti_of_thermal_ops = {
>   	.get_temp = __ti_thermal_get_temp,
>   	.get_trend = __ti_thermal_get_trend,
> @@ -301,6 +343,7 @@ static struct thermal_zone_device_ops ti_thermal_ops = {
>   	.get_trip_type = ti_thermal_get_trip_type,
>   	.get_trip_temp = ti_thermal_get_trip_temp,
>   	.get_crit_temp = ti_thermal_get_crit_temp,
> +	.notify = ti_thermal_notify,
>   };
>
>   static struct ti_thermal_data
>
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Keerthy March 7, 2016, 7:33 a.m. UTC | #2
Eduardo,

On Tuesday 16 February 2016 10:35 AM, Keerthy wrote:
> notify function is used to notify when some temperature thresholds
> are crossed. In case we get notified for a critical trip point then
> schedule an emergency shutdown function to backup orderly_poweroff
> failures.
>
> orderly_poweroff is triggered when a graceful shutdown
> of system is desired. This may be used in many critical states of the
> kernel such as when subsystems detects conditions such as critical
> temperature conditions. However, in certain conditions in system
> boot up sequences like those in the middle of driver probes being
> initiated, userspace will be unable to power off the system in a clean
> manner and leaves the system in a critical state. In cases like these,
> the /sbin/poweroff will return success (having forked off to attempt
> powering off the system. However, the system overall will fail to
> completely poweroff (since other modules will be probed) and the system
> is still functional with no userspace (since that would have shut itself
> off).
>
> However, there is no clean way of detecting such failure of userspace
> powering off the system. In such scenarios, it is necessary for a backup
> workqueue to be able to force a shutdown of the system when orderly
> shutdown is not successful after a configurable time period.
>

A gentle ping on this.

> Signed-off-by: Keerthy <j-keerthy@ti.com>
> ---
>
> The previous discussion:
>
> https://lkml.org/lkml/2016/1/28/989
>
> Making use of the notify ops to schedule a backup thermal shutdown.
>
>   drivers/thermal/ti-soc-thermal/Kconfig             | 13 +++++++
>   drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 43 ++++++++++++++++++++++
>   2 files changed, 56 insertions(+)
>
> diff --git a/drivers/thermal/ti-soc-thermal/Kconfig b/drivers/thermal/ti-soc-thermal/Kconfig
> index ea8283f..b4b9668 100644
> --- a/drivers/thermal/ti-soc-thermal/Kconfig
> +++ b/drivers/thermal/ti-soc-thermal/Kconfig
> @@ -71,3 +71,16 @@ config DRA752_THERMAL
>
>   	  This includes alert interrupts generation and also the TSHUT
>   	  support.
> +
> +config TI_THERMAL_EMERGENCY_POWEROFF_DELAY_MS
> +	int "Emergency poweroff delay in milli-seconds"
> +	depends on TI_SOC_THERMAL
> +	default 0
> +	help
> +	  The number of milliseconds to delay before emergency
> +	  poweroff kicks in. The delay should be carefully profiled
> +	  so as to give adequate time for orderly_poweroff. In case
> +	  of failure of an orderly_poweroff the emergency poweroff
> +	  kicks in after the delay has elapsed and shuts down the system.
> +
> +	  If set to 0 poweroff will happen immediately.
> diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
> index b213a12..d48eb5b1 100644
> --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
> +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
> @@ -31,6 +31,7 @@
>   #include <linux/cpumask.h>
>   #include <linux/cpu_cooling.h>
>   #include <linux/of.h>
> +#include <linux/reboot.h>
>
>   #include "ti-thermal.h"
>   #include "ti-bandgap.h"
> @@ -286,6 +287,47 @@ static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal,
>   	return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp);
>   }
>
> +/**
> + * emergency_poweroff_func - emergency poweroff work after a known delay
> + * @work: work_struct associated with the emergency poweroff function
> + *
> + * This function is called in very critical situations to force
> + * a kernel poweroff after a configurable timeout value.
> + */
> +static void emergency_poweroff_func(struct work_struct *work)
> +{
> +	pr_warn("Attempting kernel_power_off\n");
> +	kernel_power_off();
> +
> +	pr_warn("kernel_power_off has failed! Attempting emergency_restart\n");
> +	emergency_restart();
> +}
> +
> +static DECLARE_DELAYED_WORK(emergency_poweroff_work, emergency_poweroff_func);
> +
> +/**
> + * emergency_poweroff - Trigger an emergency system poweroff
> + *
> + * This may be called from any critical situation to trigger a system shutdown
> + * after a known period of time. By default the delay is 0 millisecond
> + */
> +void ti_thermal_emergency_poweroff(void)
> +{
> +	schedule_delayed_work(&emergency_poweroff_work,
> +	msecs_to_jiffies(CONFIG_TI_THERMAL_EMERGENCY_POWEROFF_DELAY_MS));
> +}
> +
> +static int ti_thermal_notify(struct thermal_zone_device *thermal, int temp,
> +			     enum thermal_trip_type type)
> +{
> +	if (type == THERMAL_TRIP_CRITICAL) {
> +		pr_warn("critical temperature %d reached", temp);
> +		ti_thermal_emergency_poweroff();
> +	}
> +
> +	return 0;
> +}
> +
>   static const struct thermal_zone_of_device_ops ti_of_thermal_ops = {
>   	.get_temp = __ti_thermal_get_temp,
>   	.get_trend = __ti_thermal_get_trend,
> @@ -301,6 +343,7 @@ static struct thermal_zone_device_ops ti_thermal_ops = {
>   	.get_trip_type = ti_thermal_get_trip_type,
>   	.get_trip_temp = ti_thermal_get_trip_temp,
>   	.get_crit_temp = ti_thermal_get_crit_temp,
> +	.notify = ti_thermal_notify,
>   };
>
>   static struct ti_thermal_data
>
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eduardo Valentin March 8, 2016, 8:48 p.m. UTC | #3
On Tue, Feb 16, 2016 at 10:35:45AM +0530, Keerthy wrote:
> notify function is used to notify when some temperature thresholds
> are crossed. In case we get notified for a critical trip point then
> schedule an emergency shutdown function to backup orderly_poweroff
> failures.
> 
> orderly_poweroff is triggered when a graceful shutdown
> of system is desired. This may be used in many critical states of the
> kernel such as when subsystems detects conditions such as critical
> temperature conditions. However, in certain conditions in system
> boot up sequences like those in the middle of driver probes being
> initiated, userspace will be unable to power off the system in a clean
> manner and leaves the system in a critical state. In cases like these,
> the /sbin/poweroff will return success (having forked off to attempt
> powering off the system. However, the system overall will fail to
> completely poweroff (since other modules will be probed) and the system
> is still functional with no userspace (since that would have shut itself
> off).
> 
> However, there is no clean way of detecting such failure of userspace
> powering off the system. In such scenarios, it is necessary for a backup
> workqueue to be able to force a shutdown of the system when orderly
> shutdown is not successful after a configurable time period.
> 
> Signed-off-by: Keerthy <j-keerthy@ti.com>
> ---
> 
> The previous discussion:
> 
> https://lkml.org/lkml/2016/1/28/989
> 
> Making use of the notify ops to schedule a backup thermal shutdown.

Yeah, this was a bit of a stretching. If we cannot fix orderly power
off, I would say, better fixing it for all thermal drivers (previous
version of this). This patch:
1. Abuse a notify function to be used as a power off functions
2. Fix an issue only for a single driver.

I will check your initial version.

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Keerthy May 10, 2016, 12:02 p.m. UTC | #4
On Wednesday 09 March 2016 02:18 AM, Eduardo Valentin wrote:
> On Tue, Feb 16, 2016 at 10:35:45AM +0530, Keerthy wrote:
>> notify function is used to notify when some temperature thresholds
>> are crossed. In case we get notified for a critical trip point then
>> schedule an emergency shutdown function to backup orderly_poweroff
>> failures.
>>
>> orderly_poweroff is triggered when a graceful shutdown
>> of system is desired. This may be used in many critical states of the
>> kernel such as when subsystems detects conditions such as critical
>> temperature conditions. However, in certain conditions in system
>> boot up sequences like those in the middle of driver probes being
>> initiated, userspace will be unable to power off the system in a clean
>> manner and leaves the system in a critical state. In cases like these,
>> the /sbin/poweroff will return success (having forked off to attempt
>> powering off the system. However, the system overall will fail to
>> completely poweroff (since other modules will be probed) and the system
>> is still functional with no userspace (since that would have shut itself
>> off).
>>
>> However, there is no clean way of detecting such failure of userspace
>> powering off the system. In such scenarios, it is necessary for a backup
>> workqueue to be able to force a shutdown of the system when orderly
>> shutdown is not successful after a configurable time period.
>>
>> Signed-off-by: Keerthy <j-keerthy@ti.com>
>> ---
>>
>> The previous discussion:
>>
>> https://lkml.org/lkml/2016/1/28/989
>>
>> Making use of the notify ops to schedule a backup thermal shutdown.
>
> Yeah, this was a bit of a stretching. If we cannot fix orderly power
> off, I would say, better fixing it for all thermal drivers (previous
> version of this). This patch:
> 1. Abuse a notify function to be used as a power off functions
> 2. Fix an issue only for a single driver.
>
> I will check your initial version.

Eduardo,

Any updates on this?

Regards,
Keerthy

>
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/thermal/ti-soc-thermal/Kconfig b/drivers/thermal/ti-soc-thermal/Kconfig
index ea8283f..b4b9668 100644
--- a/drivers/thermal/ti-soc-thermal/Kconfig
+++ b/drivers/thermal/ti-soc-thermal/Kconfig
@@ -71,3 +71,16 @@  config DRA752_THERMAL
 
 	  This includes alert interrupts generation and also the TSHUT
 	  support.
+
+config TI_THERMAL_EMERGENCY_POWEROFF_DELAY_MS
+	int "Emergency poweroff delay in milli-seconds"
+	depends on TI_SOC_THERMAL
+	default 0
+	help
+	  The number of milliseconds to delay before emergency
+	  poweroff kicks in. The delay should be carefully profiled
+	  so as to give adequate time for orderly_poweroff. In case
+	  of failure of an orderly_poweroff the emergency poweroff
+	  kicks in after the delay has elapsed and shuts down the system.
+
+	  If set to 0 poweroff will happen immediately.
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index b213a12..d48eb5b1 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -31,6 +31,7 @@ 
 #include <linux/cpumask.h>
 #include <linux/cpu_cooling.h>
 #include <linux/of.h>
+#include <linux/reboot.h>
 
 #include "ti-thermal.h"
 #include "ti-bandgap.h"
@@ -286,6 +287,47 @@  static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal,
 	return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp);
 }
 
+/**
+ * emergency_poweroff_func - emergency poweroff work after a known delay
+ * @work: work_struct associated with the emergency poweroff function
+ *
+ * This function is called in very critical situations to force
+ * a kernel poweroff after a configurable timeout value.
+ */
+static void emergency_poweroff_func(struct work_struct *work)
+{
+	pr_warn("Attempting kernel_power_off\n");
+	kernel_power_off();
+
+	pr_warn("kernel_power_off has failed! Attempting emergency_restart\n");
+	emergency_restart();
+}
+
+static DECLARE_DELAYED_WORK(emergency_poweroff_work, emergency_poweroff_func);
+
+/**
+ * emergency_poweroff - Trigger an emergency system poweroff
+ *
+ * This may be called from any critical situation to trigger a system shutdown
+ * after a known period of time. By default the delay is 0 millisecond
+ */
+void ti_thermal_emergency_poweroff(void)
+{
+	schedule_delayed_work(&emergency_poweroff_work,
+	msecs_to_jiffies(CONFIG_TI_THERMAL_EMERGENCY_POWEROFF_DELAY_MS));
+}
+
+static int ti_thermal_notify(struct thermal_zone_device *thermal, int temp,
+			     enum thermal_trip_type type)
+{
+	if (type == THERMAL_TRIP_CRITICAL) {
+		pr_warn("critical temperature %d reached", temp);
+		ti_thermal_emergency_poweroff();
+	}
+
+	return 0;
+}
+
 static const struct thermal_zone_of_device_ops ti_of_thermal_ops = {
 	.get_temp = __ti_thermal_get_temp,
 	.get_trend = __ti_thermal_get_trend,
@@ -301,6 +343,7 @@  static struct thermal_zone_device_ops ti_thermal_ops = {
 	.get_trip_type = ti_thermal_get_trip_type,
 	.get_trip_temp = ti_thermal_get_trip_temp,
 	.get_crit_temp = ti_thermal_get_crit_temp,
+	.notify = ti_thermal_notify,
 };
 
 static struct ti_thermal_data