diff mbox series

[v1] thermal: core: Do not fail cdev registration because of invalid initial state

Message ID 4569763.LvFx2qVVIh@kreacher (mailing list archive)
State Superseded, archived
Headers show
Series [v1] thermal: core: Do not fail cdev registration because of invalid initial state | expand

Commit Message

Rafael J. Wysocki June 5, 2024, 7:17 p.m. UTC
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

It is reported that commit 31a0fa0019b0 ("thermal/debugfs: Pass cooling
device state to thermal_debug_cdev_add()") causes the ACPI fan driver
to fail probing on some systems which turns out to be due to the _FST
control method returning an invalid value until _FSL is first evaluated
for the given fan.  If this happens, the .get_cur_state() cooling device
callback returns an error and __thermal_cooling_device_register() fails
as uses that callback after commit 31a0fa0019b0.

Arguably, _FST should not return an inavlid value even if it is
evaluated before _FSL, so this may be regarded as a platform firmware
issue, but at the same time it is not a good enough reason for failing
the cooling device registration where the initial cooling device state
is only needed to initialize a thermal debug facility.

Accordingly, modify __thermal_cooling_device_register() to pass a
negative state value to thermal_debug_cdev_add() instead of failing
if the initial .get_cur_state() callback invocation fails and adjust
the thermal debug code to ignore negative cooling device state values.

Fixes: 31a0fa0019b0 ("thermal/debugfs: Pass cooling device state to thermal_debug_cdev_add()")
Closes: https://lore.kernel.org/linux-acpi/20240530153727.843378-1-laura.nao@collabora.com
Reported-by: Laura Nao <laura.nao@collabora.com>
Tested-by: Laura Nao <laura.nao@collabora.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_core.c    |   11 +++++++----
 drivers/thermal/thermal_debugfs.c |    7 ++++++-
 2 files changed, 13 insertions(+), 5 deletions(-)

Comments

srinivas pandruvada June 6, 2024, 3:41 a.m. UTC | #1
On Wed, 2024-06-05 at 21:17 +0200, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> It is reported that commit 31a0fa0019b0 ("thermal/debugfs: Pass
> cooling
> device state to thermal_debug_cdev_add()") causes the ACPI fan driver
> to fail probing on some systems which turns out to be due to the _FST
> control method returning an invalid value until _FSL is first
> evaluated
> for the given fan.  If this happens, the .get_cur_state() cooling
> device
> callback returns an error and __thermal_cooling_device_register()
> fails
> as uses that callback after commit 31a0fa0019b0.
> 
> Arguably, _FST should not return an inavlid
s/inavlid/invalid

Thanks,
Srinivas

>  value even if it is
> evaluated before _FSL, so this may be regarded as a platform firmware
> issue, but at the same time it is not a good enough reason for
> failing
> the cooling device registration where the initial cooling device
> state
> is only needed to initialize a thermal debug facility.
> 
> Accordingly, modify __thermal_cooling_device_register() to pass a
> negative state value to thermal_debug_cdev_add() instead of failing
> if the initial .get_cur_state() callback invocation fails and adjust
> the thermal debug code to ignore negative cooling device state
> values.
> 
> Fixes: 31a0fa0019b0 ("thermal/debugfs: Pass cooling device state to
> thermal_debug_cdev_add()")
> Closes:
> https://lore.kernel.org/linux-acpi/20240530153727.843378-1-laura.nao@collabora.com
> Reported-by: Laura Nao <laura.nao@collabora.com>
> Tested-by: Laura Nao <laura.nao@collabora.com>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
>  drivers/thermal/thermal_core.c    |   11 +++++++----
>  drivers/thermal/thermal_debugfs.c |    7 ++++++-
>  2 files changed, 13 insertions(+), 5 deletions(-)
> 
> Index: linux-pm/drivers/thermal/thermal_core.c
> ===================================================================
> --- linux-pm.orig/drivers/thermal/thermal_core.c
> +++ linux-pm/drivers/thermal/thermal_core.c
> @@ -964,7 +964,8 @@ __thermal_cooling_device_register(struct
>  {
>  	struct thermal_cooling_device *cdev;
>  	struct thermal_zone_device *pos = NULL;
> -	unsigned long current_state;
> +	unsigned long val;
> +	int current_state;
>  	int id, ret;
>  
>  	if (!ops || !ops->get_max_state || !ops->get_cur_state ||
> @@ -1002,9 +1003,11 @@ __thermal_cooling_device_register(struct
>  	if (ret)
>  		goto out_cdev_type;
>  
> -	ret = cdev->ops->get_cur_state(cdev, &current_state);
> -	if (ret)
> -		goto out_cdev_type;
> +	ret = cdev->ops->get_cur_state(cdev, &val);
> +	if (!ret && val >= 0 && val <= INT_MAX)
> +		current_state = val;
> +	else
> +		current_state = -1;
>  
>  	thermal_cooling_device_setup_sysfs(cdev);
>  
> Index: linux-pm/drivers/thermal/thermal_debugfs.c
> ===================================================================
> --- linux-pm.orig/drivers/thermal/thermal_debugfs.c
> +++ linux-pm/drivers/thermal/thermal_debugfs.c
> @@ -421,6 +421,8 @@ void thermal_debug_cdev_state_update(con
>  	cdev_dbg = &thermal_dbg->cdev_dbg;
>  
>  	old_state = cdev_dbg->current_state;
> +	if (old_state < 0)
> +		goto unlock;
>  
>  	/*
>  	 * Get the old state information in the durations list. If
> @@ -463,6 +465,7 @@ void thermal_debug_cdev_state_update(con
>  
>  	cdev_dbg->total++;
>  
> +unlock:
>  	mutex_unlock(&thermal_dbg->lock);
>  }
>  
> @@ -499,7 +502,9 @@ void thermal_debug_cdev_add(struct therm
>  	 * duration will be printed by cdev_dt_seq_show() as
> expected if it
>  	 * runs before the first state transition.
>  	 */
> -	thermal_debugfs_cdev_record_get(thermal_dbg, cdev_dbg-
> >durations, state);
> +	if (state >= 0)
> +		thermal_debugfs_cdev_record_get(thermal_dbg,
> cdev_dbg->durations,
> +						state);
>  
>  	debugfs_create_file("trans_table", 0400, thermal_dbg->d_top,
>  			    thermal_dbg, &tt_fops);
> 
> 
> 
>
Rafael J. Wysocki June 6, 2024, 9:47 a.m. UTC | #2
On Thu, Jun 6, 2024 at 5:41 AM srinivas pandruvada
<srinivas.pandruvada@linux.intel.com> wrote:
>
> On Wed, 2024-06-05 at 21:17 +0200, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > It is reported that commit 31a0fa0019b0 ("thermal/debugfs: Pass
> > cooling
> > device state to thermal_debug_cdev_add()") causes the ACPI fan driver
> > to fail probing on some systems which turns out to be due to the _FST
> > control method returning an invalid value until _FSL is first
> > evaluated
> > for the given fan.  If this happens, the .get_cur_state() cooling
> > device
> > callback returns an error and __thermal_cooling_device_register()
> > fails
> > as uses that callback after commit 31a0fa0019b0.
> >
> > Arguably, _FST should not return an inavlid
> s/inavlid/invalid

Thanks, I'll fix it up when applying the patch.
Daniel Lezcano June 6, 2024, 1:07 p.m. UTC | #3
On 05/06/2024 21:17, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> It is reported that commit 31a0fa0019b0 ("thermal/debugfs: Pass cooling
> device state to thermal_debug_cdev_add()") causes the ACPI fan driver
> to fail probing on some systems which turns out to be due to the _FST
> control method returning an invalid value until _FSL is first evaluated
> for the given fan.  If this happens, the .get_cur_state() cooling device
> callback returns an error and __thermal_cooling_device_register() fails
> as uses that callback after commit 31a0fa0019b0.
> 
> Arguably, _FST should not return an inavlid value even if it is
> evaluated before _FSL, so this may be regarded as a platform firmware
> issue, but at the same time it is not a good enough reason for failing
> the cooling device registration where the initial cooling device state
> is only needed to initialize a thermal debug facility.
> 
> Accordingly, modify __thermal_cooling_device_register() to pass a
> negative state value to thermal_debug_cdev_add() instead of failing
> if the initial .get_cur_state() callback invocation fails and adjust
> the thermal debug code to ignore negative cooling device state values.
> 
> Fixes: 31a0fa0019b0 ("thermal/debugfs: Pass cooling device state to thermal_debug_cdev_add()")
> Closes: https://lore.kernel.org/linux-acpi/20240530153727.843378-1-laura.nao@collabora.com
> Reported-by: Laura Nao <laura.nao@collabora.com>
> Tested-by: Laura Nao <laura.nao@collabora.com>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

As it is a driver issue, it should be fixed in the driver, not in the 
core code. The resulting code logic in the core is trying to deal with 
bad driver behavior, it does not really seem appropriate.

The core code has been clean up from the high friction it had with the 
legacy ACPI code. It would be nice to continue it this direction.
Rafael J. Wysocki June 6, 2024, 1:42 p.m. UTC | #4
On Thu, Jun 6, 2024 at 3:07 PM Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>
> On 05/06/2024 21:17, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > It is reported that commit 31a0fa0019b0 ("thermal/debugfs: Pass cooling
> > device state to thermal_debug_cdev_add()") causes the ACPI fan driver
> > to fail probing on some systems which turns out to be due to the _FST
> > control method returning an invalid value until _FSL is first evaluated
> > for the given fan.  If this happens, the .get_cur_state() cooling device
> > callback returns an error and __thermal_cooling_device_register() fails
> > as uses that callback after commit 31a0fa0019b0.
> >
> > Arguably, _FST should not return an inavlid value even if it is
> > evaluated before _FSL, so this may be regarded as a platform firmware
> > issue, but at the same time it is not a good enough reason for failing
> > the cooling device registration where the initial cooling device state
> > is only needed to initialize a thermal debug facility.
> >
> > Accordingly, modify __thermal_cooling_device_register() to pass a
> > negative state value to thermal_debug_cdev_add() instead of failing
> > if the initial .get_cur_state() callback invocation fails and adjust
> > the thermal debug code to ignore negative cooling device state values.
> >
> > Fixes: 31a0fa0019b0 ("thermal/debugfs: Pass cooling device state to thermal_debug_cdev_add()")
> > Closes: https://lore.kernel.org/linux-acpi/20240530153727.843378-1-laura.nao@collabora.com
> > Reported-by: Laura Nao <laura.nao@collabora.com>
> > Tested-by: Laura Nao <laura.nao@collabora.com>
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>
> As it is a driver issue, it should be fixed in the driver, not in the
> core code. The resulting code logic in the core is trying to deal with
> bad driver behavior, it does not really seem appropriate.
>
> The core code has been clean up from the high friction it had with the
> legacy ACPI code. It would be nice to continue it this direction.

Essentially, you are saying that .get_cur_state() should not return an
error even if it gets an utterly invalid value from the platform
firmware.

What value should it return then?
Rafael J. Wysocki June 6, 2024, 2:18 p.m. UTC | #5
On Thu, Jun 6, 2024 at 3:42 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Thu, Jun 6, 2024 at 3:07 PM Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
> >
> > On 05/06/2024 21:17, Rafael J. Wysocki wrote:
> > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > >
> > > It is reported that commit 31a0fa0019b0 ("thermal/debugfs: Pass cooling
> > > device state to thermal_debug_cdev_add()") causes the ACPI fan driver
> > > to fail probing on some systems which turns out to be due to the _FST
> > > control method returning an invalid value until _FSL is first evaluated
> > > for the given fan.  If this happens, the .get_cur_state() cooling device
> > > callback returns an error and __thermal_cooling_device_register() fails
> > > as uses that callback after commit 31a0fa0019b0.
> > >
> > > Arguably, _FST should not return an inavlid value even if it is
> > > evaluated before _FSL, so this may be regarded as a platform firmware
> > > issue, but at the same time it is not a good enough reason for failing
> > > the cooling device registration where the initial cooling device state
> > > is only needed to initialize a thermal debug facility.
> > >
> > > Accordingly, modify __thermal_cooling_device_register() to pass a
> > > negative state value to thermal_debug_cdev_add() instead of failing
> > > if the initial .get_cur_state() callback invocation fails and adjust
> > > the thermal debug code to ignore negative cooling device state values.
> > >
> > > Fixes: 31a0fa0019b0 ("thermal/debugfs: Pass cooling device state to thermal_debug_cdev_add()")
> > > Closes: https://lore.kernel.org/linux-acpi/20240530153727.843378-1-laura.nao@collabora.com
> > > Reported-by: Laura Nao <laura.nao@collabora.com>
> > > Tested-by: Laura Nao <laura.nao@collabora.com>
> > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > As it is a driver issue, it should be fixed in the driver, not in the
> > core code. The resulting code logic in the core is trying to deal with
> > bad driver behavior, it does not really seem appropriate.

Besides, I don't quite agree with dismissing it as a driver issue.  If
a driver cannot determine the cooling device state, it should not be
required to make it up.

Because .get_cur_state() is specifically designed to be able to return
an error, the core should be prepared to deal with errors returned by
it and propagating the error is not always the best choice, like in
this particular case.

> > The core code has been clean up from the high friction it had with the
> > legacy ACPI code. It would be nice to continue it this direction.

This isn't really ACPI specific.  Any driver can return an error from
.get_cur_state() if it has a good enough reason.

> Essentially, you are saying that .get_cur_state() should not return an
> error even if it gets an utterly invalid value from the platform
> firmware.
>
> What value should it return then?
Daniel Lezcano June 6, 2024, 2:50 p.m. UTC | #6
On 06/06/2024 16:18, Rafael J. Wysocki wrote:
> On Thu, Jun 6, 2024 at 3:42 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>>
>> On Thu, Jun 6, 2024 at 3:07 PM Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>>>
>>> On 05/06/2024 21:17, Rafael J. Wysocki wrote:
>>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>>>
>>>> It is reported that commit 31a0fa0019b0 ("thermal/debugfs: Pass cooling
>>>> device state to thermal_debug_cdev_add()") causes the ACPI fan driver
>>>> to fail probing on some systems which turns out to be due to the _FST
>>>> control method returning an invalid value until _FSL is first evaluated
>>>> for the given fan.  If this happens, the .get_cur_state() cooling device
>>>> callback returns an error and __thermal_cooling_device_register() fails
>>>> as uses that callback after commit 31a0fa0019b0.
>>>>
>>>> Arguably, _FST should not return an inavlid value even if it is
>>>> evaluated before _FSL, so this may be regarded as a platform firmware
>>>> issue, but at the same time it is not a good enough reason for failing
>>>> the cooling device registration where the initial cooling device state
>>>> is only needed to initialize a thermal debug facility.
>>>>
>>>> Accordingly, modify __thermal_cooling_device_register() to pass a
>>>> negative state value to thermal_debug_cdev_add() instead of failing
>>>> if the initial .get_cur_state() callback invocation fails and adjust
>>>> the thermal debug code to ignore negative cooling device state values.
>>>>
>>>> Fixes: 31a0fa0019b0 ("thermal/debugfs: Pass cooling device state to thermal_debug_cdev_add()")
>>>> Closes: https://lore.kernel.org/linux-acpi/20240530153727.843378-1-laura.nao@collabora.com
>>>> Reported-by: Laura Nao <laura.nao@collabora.com>
>>>> Tested-by: Laura Nao <laura.nao@collabora.com>
>>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>>
>>> As it is a driver issue, it should be fixed in the driver, not in the
>>> core code. The resulting code logic in the core is trying to deal with
>>> bad driver behavior, it does not really seem appropriate.
> 
> Besides, I don't quite agree with dismissing it as a driver issue.  If
> a driver cannot determine the cooling device state, it should not be
> required to make it up.
> 
> Because .get_cur_state() is specifically designed to be able to return
> an error, the core should be prepared to deal with errors returned by
> it and propagating the error is not always the best choice, like in
> this particular case.
> 
>>> The core code has been clean up from the high friction it had with the
>>> legacy ACPI code. It would be nice to continue it this direction.
> 
> This isn't really ACPI specific.  Any driver can return an error from
> .get_cur_state() if it has a good enough reason.

We are talking about registration time, right? If the driver is 
registering too soon, eg. the firmware is not ready, should it fix the 
moment it is registering the cooling device when it is sure the firmware 
completed its initialization ?


>> Essentially, you are saying that .get_cur_state() should not return an
>> error even if it gets an utterly invalid value from the platform
>> firmware.
>>
>> What value should it return then?
Rafael J. Wysocki June 6, 2024, 3:15 p.m. UTC | #7
On Thu, Jun 6, 2024 at 4:50 PM Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>
> On 06/06/2024 16:18, Rafael J. Wysocki wrote:
> > On Thu, Jun 6, 2024 at 3:42 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
> >>
> >> On Thu, Jun 6, 2024 at 3:07 PM Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
> >>>
> >>> On 05/06/2024 21:17, Rafael J. Wysocki wrote:
> >>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>>>
> >>>> It is reported that commit 31a0fa0019b0 ("thermal/debugfs: Pass cooling
> >>>> device state to thermal_debug_cdev_add()") causes the ACPI fan driver
> >>>> to fail probing on some systems which turns out to be due to the _FST
> >>>> control method returning an invalid value until _FSL is first evaluated
> >>>> for the given fan.  If this happens, the .get_cur_state() cooling device
> >>>> callback returns an error and __thermal_cooling_device_register() fails
> >>>> as uses that callback after commit 31a0fa0019b0.
> >>>>
> >>>> Arguably, _FST should not return an inavlid value even if it is
> >>>> evaluated before _FSL, so this may be regarded as a platform firmware
> >>>> issue, but at the same time it is not a good enough reason for failing
> >>>> the cooling device registration where the initial cooling device state
> >>>> is only needed to initialize a thermal debug facility.
> >>>>
> >>>> Accordingly, modify __thermal_cooling_device_register() to pass a
> >>>> negative state value to thermal_debug_cdev_add() instead of failing
> >>>> if the initial .get_cur_state() callback invocation fails and adjust
> >>>> the thermal debug code to ignore negative cooling device state values.
> >>>>
> >>>> Fixes: 31a0fa0019b0 ("thermal/debugfs: Pass cooling device state to thermal_debug_cdev_add()")
> >>>> Closes: https://lore.kernel.org/linux-acpi/20240530153727.843378-1-laura.nao@collabora.com
> >>>> Reported-by: Laura Nao <laura.nao@collabora.com>
> >>>> Tested-by: Laura Nao <laura.nao@collabora.com>
> >>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>>
> >>> As it is a driver issue, it should be fixed in the driver, not in the
> >>> core code. The resulting code logic in the core is trying to deal with
> >>> bad driver behavior, it does not really seem appropriate.
> >
> > Besides, I don't quite agree with dismissing it as a driver issue.  If
> > a driver cannot determine the cooling device state, it should not be
> > required to make it up.
> >
> > Because .get_cur_state() is specifically designed to be able to return
> > an error, the core should be prepared to deal with errors returned by
> > it and propagating the error is not always the best choice, like in
> > this particular case.
> >
> >>> The core code has been clean up from the high friction it had with the
> >>> legacy ACPI code. It would be nice to continue it this direction.
> >
> > This isn't really ACPI specific.  Any driver can return an error from
> > .get_cur_state() if it has a good enough reason.
>
> We are talking about registration time, right? If the driver is
> registering too soon, eg. the firmware is not ready, should it fix the
> moment it is registering the cooling device when it is sure the firmware
> completed its initialization ?

OK, so arguably the driver could set the initial state of the cooling
device to 0.  That may or may not be the right thing to do depending
on the thermal state of the system at the moment.  Then it would need
to wait for the governor to pick up a more suitable state for it or
leave it at 0.  This could address the particular case at hand.

However, should the core fail the cooling device registration if it
gets an error from .get_cur_state() to start with?  It didn't do that
before.  Indeed, it didn't even call .get_cur_state() then in the
first place.  Moreover, the current state of the cooling device is not
even needed to register it except for the initialization of the debug
code for that cooling device, so why fail the registration of it?
Daniel Lezcano June 6, 2024, 3:46 p.m. UTC | #8
On 06/06/2024 17:15, Rafael J. Wysocki wrote:

[ ... ]

> OK, so arguably the driver could set the initial state of the cooling
> device to 0.  That may or may not be the right thing to do depending
> on the thermal state of the system at the moment.  Then it would need
> to wait for the governor to pick up a more suitable state for it or
> leave it at 0.  This could address the particular case at hand.
> 
> However, should the core fail the cooling device registration if it
> gets an error from .get_cur_state() to start with?  It didn't do that
> before.  Indeed, it didn't even call .get_cur_state() then in the
> first place.  Moreover, the current state of the cooling device is not
> even needed to register it except for the initialization of the debug
> code for that cooling device, so why fail the registration of it?

Indeed, the simpler way would be to not register the debugfs if we can't 
get the initial cooling device state, with a message, so the driver 
responsible of that will appear and hopefully encourage someone to fix it.
Rafael J. Wysocki June 6, 2024, 3:52 p.m. UTC | #9
On Thu, Jun 6, 2024 at 5:46 PM Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>
> On 06/06/2024 17:15, Rafael J. Wysocki wrote:
>
> [ ... ]
>
> > OK, so arguably the driver could set the initial state of the cooling
> > device to 0.  That may or may not be the right thing to do depending
> > on the thermal state of the system at the moment.  Then it would need
> > to wait for the governor to pick up a more suitable state for it or
> > leave it at 0.  This could address the particular case at hand.
> >
> > However, should the core fail the cooling device registration if it
> > gets an error from .get_cur_state() to start with?  It didn't do that
> > before.  Indeed, it didn't even call .get_cur_state() then in the
> > first place.  Moreover, the current state of the cooling device is not
> > even needed to register it except for the initialization of the debug
> > code for that cooling device, so why fail the registration of it?
>
> Indeed, the simpler way would be to not register the debugfs if we can't
> get the initial cooling device state, with a message, so the driver
> responsible of that will appear and hopefully encourage someone to fix it.

OK, that can be done.
diff mbox series

Patch

Index: linux-pm/drivers/thermal/thermal_core.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_core.c
+++ linux-pm/drivers/thermal/thermal_core.c
@@ -964,7 +964,8 @@  __thermal_cooling_device_register(struct
 {
 	struct thermal_cooling_device *cdev;
 	struct thermal_zone_device *pos = NULL;
-	unsigned long current_state;
+	unsigned long val;
+	int current_state;
 	int id, ret;
 
 	if (!ops || !ops->get_max_state || !ops->get_cur_state ||
@@ -1002,9 +1003,11 @@  __thermal_cooling_device_register(struct
 	if (ret)
 		goto out_cdev_type;
 
-	ret = cdev->ops->get_cur_state(cdev, &current_state);
-	if (ret)
-		goto out_cdev_type;
+	ret = cdev->ops->get_cur_state(cdev, &val);
+	if (!ret && val >= 0 && val <= INT_MAX)
+		current_state = val;
+	else
+		current_state = -1;
 
 	thermal_cooling_device_setup_sysfs(cdev);
 
Index: linux-pm/drivers/thermal/thermal_debugfs.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_debugfs.c
+++ linux-pm/drivers/thermal/thermal_debugfs.c
@@ -421,6 +421,8 @@  void thermal_debug_cdev_state_update(con
 	cdev_dbg = &thermal_dbg->cdev_dbg;
 
 	old_state = cdev_dbg->current_state;
+	if (old_state < 0)
+		goto unlock;
 
 	/*
 	 * Get the old state information in the durations list. If
@@ -463,6 +465,7 @@  void thermal_debug_cdev_state_update(con
 
 	cdev_dbg->total++;
 
+unlock:
 	mutex_unlock(&thermal_dbg->lock);
 }
 
@@ -499,7 +502,9 @@  void thermal_debug_cdev_add(struct therm
 	 * duration will be printed by cdev_dt_seq_show() as expected if it
 	 * runs before the first state transition.
 	 */
-	thermal_debugfs_cdev_record_get(thermal_dbg, cdev_dbg->durations, state);
+	if (state >= 0)
+		thermal_debugfs_cdev_record_get(thermal_dbg, cdev_dbg->durations,
+						state);
 
 	debugfs_create_file("trans_table", 0400, thermal_dbg->d_top,
 			    thermal_dbg, &tt_fops);