diff mbox series

wifi: iwlwifi: mvm: enable thermal zone only when firmware is loaded

Message ID 20230710104626.8399-1-hpausten@protonmail.com (mailing list archive)
State Handled Elsewhere
Delegated to: Miri Korenblit
Headers show
Series wifi: iwlwifi: mvm: enable thermal zone only when firmware is loaded | expand

Commit Message

Harry Austen July 10, 2023, 10:47 a.m. UTC
In iwl_mvm_thermal_zone_register(), when registering a thermal zone, the
thermal subsystem will evaluate its temperature.
But iwl_mvm_tzone_get_temp() fails at this time because
iwl_mvm_firmware_running() returns false.
And that's why many users report that they see
"thermal thermal_zoneX: failed to read out thermal zone (-61)"
message during wifi driver probing.

This patch attempts to fix this by delaying enabling of the thermal zone
until after the firmware has been loaded/initialized. It also gets
disabled when going into suspend.

Signed-off-by: Harry Austen <hpausten@protonmail.com>
---
 .../net/wireless/intel/iwlwifi/mvm/mac80211.c  | 18 ++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/mvm/tt.c    |  9 +--------
 2 files changed, 19 insertions(+), 8 deletions(-)

--
2.41.0

Comments

Daniel Lezcano July 13, 2023, 9:41 a.m. UTC | #1
On 10/07/2023 12:47, Harry Austen wrote:
> In iwl_mvm_thermal_zone_register(), when registering a thermal zone, the
> thermal subsystem will evaluate its temperature.
> But iwl_mvm_tzone_get_temp() fails at this time because
> iwl_mvm_firmware_running() returns false.
> And that's why many users report that they see
> "thermal thermal_zoneX: failed to read out thermal zone (-61)"
> message during wifi driver probing.
> 
> This patch attempts to fix this by delaying enabling of the thermal zone
> until after the firmware has been loaded/initialized. It also gets
> disabled when going into suspend.

Thanks for taking care of this bug.

The thermal zone will be accessible from userspace and can be enabled 
manually. The resulting temperature read error will be the same in this 
case.

IMO, it is cleaner to actually [un]register the thermal zone when the 
firmware is [un]loaded

> Signed-off-by: Harry Austen <hpausten@protonmail.com>
> ---
>   .../net/wireless/intel/iwlwifi/mvm/mac80211.c  | 18 ++++++++++++++++++
>   drivers/net/wireless/intel/iwlwifi/mvm/tt.c    |  9 +--------
>   2 files changed, 19 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> index ce7905faa08f..a47d29a64dd4 100644
> --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> @@ -1187,6 +1187,17 @@ int iwl_mvm_mac_start(struct ieee80211_hw *hw)
> 
>   	mutex_unlock(&mvm->mutex);
> 
> +#ifdef CONFIG_THERMAL
> +	/* Needs to be done outside of mutex guarded section to prevent deadlock, since enabling
> +	 * the thermal zone calls the .get_temp() callback, which attempts to acquire the mutex.
> +	 */
> +	if (!ret) {
> +		ret = thermal_zone_device_enable(mvm->tz_device.tzone);
> +		if (ret)
> +			IWL_DEBUG_TEMP(mvm, "Failed to enable thermal zone (err = %d)\n", ret);
> +	}
> +#endif
> +
>   	iwl_mvm_mei_set_sw_rfkill_state(mvm);
> 
>   	return ret;
> @@ -1282,6 +1293,7 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm)
>   void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
>   {
>   	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
> +	int ret;
> 
>   	flush_work(&mvm->async_handlers_wk);
>   	flush_work(&mvm->add_stream_wk);
> @@ -1307,6 +1319,12 @@ void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
> 
>   	iwl_mvm_mei_set_sw_rfkill_state(mvm);
> 
> +#ifdef CONFIG_THERMAL
> +	ret = thermal_zone_device_disable(mvm->tz_device.tzone);
> +	if (ret)
> +		IWL_DEBUG_TEMP(mvm, "Failed to disable thermal zone (err = %d)\n", ret);
> +#endif
> +
>   	mutex_lock(&mvm->mutex);
>   	__iwl_mvm_mac_stop(mvm);
>   	mutex_unlock(&mvm->mutex);
> diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> index 157e96fa23c1..964d2d011c6b 100644
> --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> @@ -680,7 +680,7 @@ static  struct thermal_zone_device_ops tzone_ops = {
> 
>   static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
>   {
> -	int i, ret;
> +	int i;
>   	char name[16];
>   	static atomic_t counter = ATOMIC_INIT(0);
> 
> @@ -707,13 +707,6 @@ static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
>   		return;
>   	}
> 
> -	ret = thermal_zone_device_enable(mvm->tz_device.tzone);
> -	if (ret) {
> -		IWL_DEBUG_TEMP(mvm, "Failed to enable thermal zone\n");
> -		thermal_zone_device_unregister(mvm->tz_device.tzone);
> -		return;
> -	}
> -
>   	/* 0 is a valid temperature,
>   	 * so initialize the array with S16_MIN which invalid temperature
>   	 */
> --
> 2.41.0
> 
>
Zhang, Rui July 16, 2023, 1:45 p.m. UTC | #2
On Thu, 2023-07-13 at 11:41 +0200, Daniel Lezcano wrote:
> On 10/07/2023 12:47, Harry Austen wrote:
> > In iwl_mvm_thermal_zone_register(), when registering a thermal
> > zone, the
> > thermal subsystem will evaluate its temperature.
> > But iwl_mvm_tzone_get_temp() fails at this time because
> > iwl_mvm_firmware_running() returns false.
> > And that's why many users report that they see
> > "thermal thermal_zoneX: failed to read out thermal zone (-61)"
> > message during wifi driver probing.
> > 
> > This patch attempts to fix this by delaying enabling of the thermal
> > zone
> > until after the firmware has been loaded/initialized. It also gets
> > disabled when going into suspend.
> 
> Thanks for taking care of this bug.
> 
> The thermal zone will be accessible from userspace

Currently, I see that the mode is checked only in
__thermal_zone_device_update().

should we limit the userspace access for disabled thermal zone? For
example, return failure when reading 'temp' sysfs attribute.

>  and can be enabled
> manually.

maybe we should have a .change_mode() callback and return failure when
enabling the thermal zone with wifi firmware unloaded.

>  The resulting temperature read error will be the same in this 
> case.
> 
> IMO, it is cleaner to actually [un]register the thermal zone when the
> firmware is [un]loaded

Austen,
may I know how often is this wifi firmware loaded/unloaded?

thanks,
rui
> 
> > Signed-off-by: Harry Austen <hpausten@protonmail.com>
> > ---
> >   .../net/wireless/intel/iwlwifi/mvm/mac80211.c  | 18
> > ++++++++++++++++++
> >   drivers/net/wireless/intel/iwlwifi/mvm/tt.c    |  9 +--------
> >   2 files changed, 19 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > index ce7905faa08f..a47d29a64dd4 100644
> > --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > @@ -1187,6 +1187,17 @@ int iwl_mvm_mac_start(struct ieee80211_hw
> > *hw)
> > 
> >         mutex_unlock(&mvm->mutex);
> > 
> > +#ifdef CONFIG_THERMAL
> > +       /* Needs to be done outside of mutex guarded section to
> > prevent deadlock, since enabling
> > +        * the thermal zone calls the .get_temp() callback, which
> > attempts to acquire the mutex.
> > +        */
> > +       if (!ret) {
> > +               ret = thermal_zone_device_enable(mvm-
> > >tz_device.tzone);
> > +               if (ret)
> > +                       IWL_DEBUG_TEMP(mvm, "Failed to enable
> > thermal zone (err = %d)\n", ret);
> > +       }
> > +#endif
> > +
> >         iwl_mvm_mei_set_sw_rfkill_state(mvm);
> > 
> >         return ret;
> > @@ -1282,6 +1293,7 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm)
> >   void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
> >   {
> >         struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
> > +       int ret;
> > 
> >         flush_work(&mvm->async_handlers_wk);
> >         flush_work(&mvm->add_stream_wk);
> > @@ -1307,6 +1319,12 @@ void iwl_mvm_mac_stop(struct ieee80211_hw
> > *hw)
> > 
> >         iwl_mvm_mei_set_sw_rfkill_state(mvm);
> > 
> > +#ifdef CONFIG_THERMAL
> > +       ret = thermal_zone_device_disable(mvm->tz_device.tzone);
> > +       if (ret)
> > +               IWL_DEBUG_TEMP(mvm, "Failed to disable thermal zone
> > (err = %d)\n", ret);
> > +#endif
> > +
> >         mutex_lock(&mvm->mutex);
> >         __iwl_mvm_mac_stop(mvm);
> >         mutex_unlock(&mvm->mutex);
> > diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > index 157e96fa23c1..964d2d011c6b 100644
> > --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > @@ -680,7 +680,7 @@ static  struct thermal_zone_device_ops
> > tzone_ops = {
> > 
> >   static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
> >   {
> > -       int i, ret;
> > +       int i;
> >         char name[16];
> >         static atomic_t counter = ATOMIC_INIT(0);
> > 
> > @@ -707,13 +707,6 @@ static void
> > iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
> >                 return;
> >         }
> > 
> > -       ret = thermal_zone_device_enable(mvm->tz_device.tzone);
> > -       if (ret) {
> > -               IWL_DEBUG_TEMP(mvm, "Failed to enable thermal
> > zone\n");
> > -               thermal_zone_device_unregister(mvm-
> > >tz_device.tzone);
> > -               return;
> > -       }
> > -
> >         /* 0 is a valid temperature,
> >          * so initialize the array with S16_MIN which invalid
> > temperature
> >          */
> > --
> > 2.41.0
> > 
> > 
>
Harry Austen July 23, 2023, 9:10 p.m. UTC | #3
On Sun Jul 16, 2023 at 2:45 PM BST, Zhang, Rui wrote:
> On Thu, 2023-07-13 at 11:41 +0200, Daniel Lezcano wrote:
> > On 10/07/2023 12:47, Harry Austen wrote:
> > > In iwl_mvm_thermal_zone_register(), when registering a thermal
> > > zone, the
> > > thermal subsystem will evaluate its temperature.
> > > But iwl_mvm_tzone_get_temp() fails at this time because
> > > iwl_mvm_firmware_running() returns false.
> > > And that's why many users report that they see
> > > "thermal thermal_zoneX: failed to read out thermal zone (-61)"
> > > message during wifi driver probing.
> > >
> > > This patch attempts to fix this by delaying enabling of the thermal
> > > zone
> > > until after the firmware has been loaded/initialized. It also gets
> > > disabled when going into suspend.
> >
> > Thanks for taking care of this bug.
> >
> > The thermal zone will be accessible from userspace
>
> Currently, I see that the mode is checked only in
> __thermal_zone_device_update().
>
> should we limit the userspace access for disabled thermal zone? For
> example, return failure when reading 'temp' sysfs attribute.

Ah okay thanks for checking. Yes, agree that is probably a more sensible behaviour.

>
> >  and can be enabled
> > manually.
>
> maybe we should have a .change_mode() callback and return failure when
> enabling the thermal zone with wifi firmware unloaded.
>
> >  The resulting temperature read error will be the same in this
> > case.
> >
> > IMO, it is cleaner to actually [un]register the thermal zone when the
> > firmware is [un]loaded
>
> Austen,
> may I know how often is this wifi firmware loaded/unloaded?

Personally, I have only ever seen the firmware loaded once on boot and never unloaded. That was the reason for my patch;
I was trying to reduce the number of kernel warning/error log messages on my system during boot. As far as I can tell,
the ieee80211_ops stop() callback is only called in drv_stop(), which for example can be called via ieee80211_suspend().

>
> thanks,
> rui
> >
> > > Signed-off-by: Harry Austen <hpausten@protonmail.com>
> > > ---
> > >   .../net/wireless/intel/iwlwifi/mvm/mac80211.c  | 18
> > > ++++++++++++++++++
> > >   drivers/net/wireless/intel/iwlwifi/mvm/tt.c    |  9 +--------
> > >   2 files changed, 19 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > > b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > > index ce7905faa08f..a47d29a64dd4 100644
> > > --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > > +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > > @@ -1187,6 +1187,17 @@ int iwl_mvm_mac_start(struct ieee80211_hw
> > > *hw)
> > >
> > >         mutex_unlock(&mvm->mutex);
> > >
> > > +#ifdef CONFIG_THERMAL
> > > +       /* Needs to be done outside of mutex guarded section to
> > > prevent deadlock, since enabling
> > > +        * the thermal zone calls the .get_temp() callback, which
> > > attempts to acquire the mutex.
> > > +        */
> > > +       if (!ret) {
> > > +               ret = thermal_zone_device_enable(mvm-
> > > >tz_device.tzone);
> > > +               if (ret)
> > > +                       IWL_DEBUG_TEMP(mvm, "Failed to enable
> > > thermal zone (err = %d)\n", ret);
> > > +       }
> > > +#endif
> > > +
> > >         iwl_mvm_mei_set_sw_rfkill_state(mvm);
> > >
> > >         return ret;
> > > @@ -1282,6 +1293,7 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm)
> > >   void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
> > >   {
> > >         struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
> > > +       int ret;
> > >
> > >         flush_work(&mvm->async_handlers_wk);
> > >         flush_work(&mvm->add_stream_wk);
> > > @@ -1307,6 +1319,12 @@ void iwl_mvm_mac_stop(struct ieee80211_hw
> > > *hw)
> > >
> > >         iwl_mvm_mei_set_sw_rfkill_state(mvm);
> > >
> > > +#ifdef CONFIG_THERMAL
> > > +       ret = thermal_zone_device_disable(mvm->tz_device.tzone);
> > > +       if (ret)
> > > +               IWL_DEBUG_TEMP(mvm, "Failed to disable thermal zone
> > > (err = %d)\n", ret);
> > > +#endif
> > > +
> > >         mutex_lock(&mvm->mutex);
> > >         __iwl_mvm_mac_stop(mvm);
> > >         mutex_unlock(&mvm->mutex);
> > > diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > > b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > > index 157e96fa23c1..964d2d011c6b 100644
> > > --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > > +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > > @@ -680,7 +680,7 @@ static  struct thermal_zone_device_ops
> > > tzone_ops = {
> > >
> > >   static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
> > >   {
> > > -       int i, ret;
> > > +       int i;
> > >         char name[16];
> > >         static atomic_t counter = ATOMIC_INIT(0);
> > >
> > > @@ -707,13 +707,6 @@ static void
> > > iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
> > >                 return;
> > >         }
> > >
> > > -       ret = thermal_zone_device_enable(mvm->tz_device.tzone);
> > > -       if (ret) {
> > > -               IWL_DEBUG_TEMP(mvm, "Failed to enable thermal
> > > zone\n");
> > > -               thermal_zone_device_unregister(mvm-
> > > >tz_device.tzone);
> > > -               return;
> > > -       }
> > > -
> > >         /* 0 is a valid temperature,
> > >          * so initialize the array with S16_MIN which invalid
> > > temperature
> > >          */
> > > --
> > > 2.41.0
> > >
> > >
> >

Thanks for the review! :)
Harry
diff mbox series

Patch

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index ce7905faa08f..a47d29a64dd4 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1187,6 +1187,17 @@  int iwl_mvm_mac_start(struct ieee80211_hw *hw)

 	mutex_unlock(&mvm->mutex);

+#ifdef CONFIG_THERMAL
+	/* Needs to be done outside of mutex guarded section to prevent deadlock, since enabling
+	 * the thermal zone calls the .get_temp() callback, which attempts to acquire the mutex.
+	 */
+	if (!ret) {
+		ret = thermal_zone_device_enable(mvm->tz_device.tzone);
+		if (ret)
+			IWL_DEBUG_TEMP(mvm, "Failed to enable thermal zone (err = %d)\n", ret);
+	}
+#endif
+
 	iwl_mvm_mei_set_sw_rfkill_state(mvm);

 	return ret;
@@ -1282,6 +1293,7 @@  void __iwl_mvm_mac_stop(struct iwl_mvm *mvm)
 void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
 {
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+	int ret;

 	flush_work(&mvm->async_handlers_wk);
 	flush_work(&mvm->add_stream_wk);
@@ -1307,6 +1319,12 @@  void iwl_mvm_mac_stop(struct ieee80211_hw *hw)

 	iwl_mvm_mei_set_sw_rfkill_state(mvm);

+#ifdef CONFIG_THERMAL
+	ret = thermal_zone_device_disable(mvm->tz_device.tzone);
+	if (ret)
+		IWL_DEBUG_TEMP(mvm, "Failed to disable thermal zone (err = %d)\n", ret);
+#endif
+
 	mutex_lock(&mvm->mutex);
 	__iwl_mvm_mac_stop(mvm);
 	mutex_unlock(&mvm->mutex);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
index 157e96fa23c1..964d2d011c6b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
@@ -680,7 +680,7 @@  static  struct thermal_zone_device_ops tzone_ops = {

 static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
 {
-	int i, ret;
+	int i;
 	char name[16];
 	static atomic_t counter = ATOMIC_INIT(0);

@@ -707,13 +707,6 @@  static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
 		return;
 	}

-	ret = thermal_zone_device_enable(mvm->tz_device.tzone);
-	if (ret) {
-		IWL_DEBUG_TEMP(mvm, "Failed to enable thermal zone\n");
-		thermal_zone_device_unregister(mvm->tz_device.tzone);
-		return;
-	}
-
 	/* 0 is a valid temperature,
 	 * so initialize the array with S16_MIN which invalid temperature
 	 */