diff mbox series

[v2,3/3] thermal: intel: hfi: Enable interface only when required

Message ID 20240206133605.1518373-4-stanislaw.gruszka@linux.intel.com (mailing list archive)
State Superseded
Headers show
Series thermal/netlink/intel_hfi: Enable HFI feature only when required | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1050 this patch: 1050
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 2 maintainers not CCed: lukasz.luba@arm.com rui.zhang@intel.com
netdev/build_clang success Errors and warnings before: 1066 this patch: 1066
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1067 this patch: 1067
netdev/checkpatch warning WARNING: line length of 85 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Stanislaw Gruszka Feb. 6, 2024, 1:36 p.m. UTC
Enable and disable hardware feedback interface (HFI) when user space
handler is present. For example, enable HFI, when intel-speed-select or
Intel Low Power daemon is running and subscribing to thermal netlink
events. When user space handlers exit or remove subscription for
thermal netlink events, disable HFI.

Summary of changes:

- Register a thermal genetlink notifier

- In the notifier, process THERMAL_NOTIFY_BIND and THERMAL_NOTIFY_UNBIND
reason codes to count number of thermal event group netlink multicast
clients. If thermal netlink group has any listener enable HFI on all
packages. If there are no listener disable HFI on all packages.

- When CPU is online, instead of blindly enabling HFI, check if
the thermal netlink group has any listener. This will make sure that
HFI is not enabled by default during boot time.

- Actual processing to enable/disable matches what is done in
suspend/resume callbacks. Create two functions hfi_do_enable()
and hfi_do_disable(), which can be called from  the netlink notifier
callback and suspend/resume callbacks.

Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
---
 drivers/thermal/intel/intel_hfi.c | 95 +++++++++++++++++++++++++++----
 1 file changed, 85 insertions(+), 10 deletions(-)

Comments

Ricardo Neri Feb. 7, 2024, 3:18 a.m. UTC | #1
On Tue, Feb 06, 2024 at 02:36:05PM +0100, Stanislaw Gruszka wrote:
> Enable and disable hardware feedback interface (HFI) when user space
> handler is present. For example, enable HFI, when intel-speed-select or
> Intel Low Power daemon is running and subscribing to thermal netlink
> events. When user space handlers exit or remove subscription for
> thermal netlink events, disable HFI.
> 
> Summary of changes:
> 
> - Register a thermal genetlink notifier
> 
> - In the notifier, process THERMAL_NOTIFY_BIND and THERMAL_NOTIFY_UNBIND
> reason codes to count number of thermal event group netlink multicast
> clients. If thermal netlink group has any listener enable HFI on all
> packages. If there are no listener disable HFI on all packages.
> 
> - When CPU is online, instead of blindly enabling HFI, check if
> the thermal netlink group has any listener. This will make sure that
> HFI is not enabled by default during boot time.
> 
> - Actual processing to enable/disable matches what is done in
> suspend/resume callbacks. Create two functions hfi_do_enable()
> and hfi_do_disable(), which can be called from  the netlink notifier
> callback and suspend/resume callbacks.
> 
> Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
> ---
>  drivers/thermal/intel/intel_hfi.c | 95 +++++++++++++++++++++++++++----
>  1 file changed, 85 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> index 3b04c6ec4fca..5e1e2b5269b7 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -159,6 +159,7 @@ struct hfi_cpu_info {
>  static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
>  
>  static int max_hfi_instances;
> +static int hfi_thermal_clients_num;

Perhaps this counter can be generalized for other clients besides netlink.
KVM could also use it to enable/disable HFI as needed for virtual machines.

Maybe we should expose a function intel_hfi_toggle(bool enable) or a couple
of intel_hfi_enable()/intel_hfi_disable() functions. The former would
increase the counter and enable HFI on all packages. The latter would
decrease the counter and disable HFI if the counter becomes 0.

>  static struct hfi_instance *hfi_instances;
>  
>  static struct hfi_features hfi_features;
> @@ -477,8 +478,11 @@ void intel_hfi_online(unsigned int cpu)
>  enable:
>  	cpumask_set_cpu(cpu, hfi_instance->cpus);
>  
> -	/* Enable this HFI instance if this is its first online CPU. */
> -	if (cpumask_weight(hfi_instance->cpus) == 1) {
> +	/*
> +	 * Enable this HFI instance if this is its first online CPU and
> +	 * there are user-space clients of thermal events.
> +	 */
> +	if (cpumask_weight(hfi_instance->cpus) == 1 && hfi_thermal_clients_num > 0) {
>  		hfi_set_hw_table(hfi_instance);
>  		hfi_enable();
>  	}
> @@ -573,28 +577,93 @@ static __init int hfi_parse_features(void)
>  	return 0;
>  }
>  
> -static void hfi_do_enable(void)
> +/*
> + * HFI enable/disable run in non-concurrent manner on boot CPU in syscore
> + * callbacks or under protection of hfi_instance_lock.
> + */
> +static void hfi_do_enable(void *ptr)
> +{
> +	struct hfi_instance *hfi_instance = ptr;
> +
> +	hfi_set_hw_table(hfi_instance);
> +	hfi_enable();
> +}
> +
> +static void hfi_do_disable(void *ptr)
> +{
> +	hfi_disable();
> +}
> +
> +static void hfi_syscore_resume(void)
>  {
>  	/* This code runs only on the boot CPU. */
>  	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
>  	struct hfi_instance *hfi_instance = info->hfi_instance;
>  
> -	/* No locking needed. There is no concurrency with CPU online. */
> -	hfi_set_hw_table(hfi_instance);
> -	hfi_enable();
> +	if (hfi_thermal_clients_num > 0)
> +		hfi_do_enable(hfi_instance);
>  }
>  
> -static int hfi_do_disable(void)
> +static int hfi_syscore_suspend(void)
>  {
> -	/* No locking needed. There is no concurrency with CPU offline. */
>  	hfi_disable();
>  
>  	return 0;
>  }
>  
>  static struct syscore_ops hfi_pm_ops = {
> -	.resume = hfi_do_enable,
> -	.suspend = hfi_do_disable,
> +	.resume = hfi_syscore_resume,
> +	.suspend = hfi_syscore_suspend,
> +};
> +
> +static int hfi_thermal_notify(struct notifier_block *nb, unsigned long state,
> +			      void *_notify)
> +{
> +	struct thermal_genl_notify *notify = _notify;
> +	struct hfi_instance *hfi_instance;
> +	smp_call_func_t func;
> +	unsigned int cpu;
> +	int i;
> +
> +	if (notify->mcgrp != THERMAL_GENL_EVENT_GROUP)
> +		return NOTIFY_DONE;
> +
> +	if (state != THERMAL_NOTIFY_BIND && state != THERMAL_NOTIFY_UNBIND)
> +		return NOTIFY_DONE;
> +
> +	mutex_lock(&hfi_instance_lock);
> +
> +	switch (state) {
> +	case THERMAL_NOTIFY_BIND:
> +		hfi_thermal_clients_num++;
> +		break;

Perhaps here you could call intel_hfi_enable()

> +	case THERMAL_NOTIFY_UNBIND:
> +		hfi_thermal_clients_num--;
> +		break;
> +	}

and here intel_hfi_disable().

> +
> +	if (hfi_thermal_clients_num > 0)
> +		func = hfi_do_enable;
> +	else
> +		func = hfi_do_disable;
> +
> +	for (i = 0; i < max_hfi_instances; i++) {
> +		hfi_instance = &hfi_instances[i];
> +		if (cpumask_empty(hfi_instance->cpus))
> +			continue;
> +
> +		cpu = cpumask_any(hfi_instance->cpus);
> +		smp_call_function_single(cpu, func, hfi_instance, true);
> +	}

This block would go in a helper function.

I know this is beyond the scope of the patchset but it would make the
logic more generic for other clients to use.
> +
> +	mutex_unlock(&hfi_instance_lock);
> +
> +	return NOTIFY_OK;
> +}
> +
> +static struct notifier_block hfi_thermal_nb = {
> +	.notifier_call = hfi_thermal_notify,
>  };
>  
>  void __init intel_hfi_init(void)
> @@ -628,10 +697,16 @@ void __init intel_hfi_init(void)
>  	if (!hfi_updates_wq)
>  		goto err_nomem;
>  
> +	if (thermal_genl_register_notifier(&hfi_thermal_nb))
> +		goto err_nl_notif;
> +
>  	register_syscore_ops(&hfi_pm_ops);
>  
>  	return;
>  
> +err_nl_notif:
> +	destroy_workqueue(hfi_updates_wq);
> +
>  err_nomem:
>  	for (j = 0; j < i; ++j) {
>  		hfi_instance = &hfi_instances[j];
> -- 
> 2.34.1
>
Stanislaw Gruszka Feb. 7, 2024, 11:47 a.m. UTC | #2
On Tue, Feb 06, 2024 at 07:18:54PM -0800, Ricardo Neri wrote:
> On Tue, Feb 06, 2024 at 02:36:05PM +0100, Stanislaw Gruszka wrote:
> > Enable and disable hardware feedback interface (HFI) when user space
> > handler is present. For example, enable HFI, when intel-speed-select or
> > Intel Low Power daemon is running and subscribing to thermal netlink
> > events. When user space handlers exit or remove subscription for
> > thermal netlink events, disable HFI.
> > 
> > Summary of changes:
> > 
> > - Register a thermal genetlink notifier
> > 
> > - In the notifier, process THERMAL_NOTIFY_BIND and THERMAL_NOTIFY_UNBIND
> > reason codes to count number of thermal event group netlink multicast
> > clients. If thermal netlink group has any listener enable HFI on all
> > packages. If there are no listener disable HFI on all packages.
> > 
> > - When CPU is online, instead of blindly enabling HFI, check if
> > the thermal netlink group has any listener. This will make sure that
> > HFI is not enabled by default during boot time.
> > 
> > - Actual processing to enable/disable matches what is done in
> > suspend/resume callbacks. Create two functions hfi_do_enable()
> > and hfi_do_disable(), which can be called from  the netlink notifier
> > callback and suspend/resume callbacks.
> > 
> > Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
> > ---
> >  drivers/thermal/intel/intel_hfi.c | 95 +++++++++++++++++++++++++++----
> >  1 file changed, 85 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> > index 3b04c6ec4fca..5e1e2b5269b7 100644
> > --- a/drivers/thermal/intel/intel_hfi.c
> > +++ b/drivers/thermal/intel/intel_hfi.c
> > @@ -159,6 +159,7 @@ struct hfi_cpu_info {
> >  static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
> >  
> >  static int max_hfi_instances;
> > +static int hfi_thermal_clients_num;
> 
> Perhaps this counter can be generalized for other clients besides netlink.
> KVM could also use it to enable/disable HFI as needed for virtual machines.

Probably it will be useful for other clients, however that will depend
how (KVM) callback/notification would be implemented. For now I would
stick with thermal_clients_num, name can be always changed.

> Maybe we should expose a function intel_hfi_toggle(bool enable) or a couple
I think toggle(true) / toggle(false) is less understandable than
enable() / disable(). Generally "toggle" means switch to opposite
state.

> of intel_hfi_enable()/intel_hfi_disable() functions. The former would
> increase the counter and enable HFI on all packages. The latter would
> decrease the counter and disable HFI if the counter becomes 0.

I would prefer not to do this, as well as other similar things you
requested below, in this patchset. Those can be done as separate
cleanup patch on top of this set. If there is benefit of such refactors
it will be clean seen then.
 
Regards
Stanislaw


> >  static struct hfi_instance *hfi_instances;
> >  
> >  static struct hfi_features hfi_features;
> > @@ -477,8 +478,11 @@ void intel_hfi_online(unsigned int cpu)
> >  enable:
> >  	cpumask_set_cpu(cpu, hfi_instance->cpus);
> >  
> > -	/* Enable this HFI instance if this is its first online CPU. */
> > -	if (cpumask_weight(hfi_instance->cpus) == 1) {
> > +	/*
> > +	 * Enable this HFI instance if this is its first online CPU and
> > +	 * there are user-space clients of thermal events.
> > +	 */
> > +	if (cpumask_weight(hfi_instance->cpus) == 1 && hfi_thermal_clients_num > 0) {
> >  		hfi_set_hw_table(hfi_instance);
> >  		hfi_enable();
> >  	}
> > @@ -573,28 +577,93 @@ static __init int hfi_parse_features(void)
> >  	return 0;
> >  }
> >  
> > -static void hfi_do_enable(void)
> > +/*
> > + * HFI enable/disable run in non-concurrent manner on boot CPU in syscore
> > + * callbacks or under protection of hfi_instance_lock.
> > + */
> > +static void hfi_do_enable(void *ptr)
> > +{
> > +	struct hfi_instance *hfi_instance = ptr;
> > +
> > +	hfi_set_hw_table(hfi_instance);
> > +	hfi_enable();
> > +}
> > +
> > +static void hfi_do_disable(void *ptr)
> > +{
> > +	hfi_disable();
> > +}
> > +
> > +static void hfi_syscore_resume(void)
> >  {
> >  	/* This code runs only on the boot CPU. */
> >  	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
> >  	struct hfi_instance *hfi_instance = info->hfi_instance;
> >  
> > -	/* No locking needed. There is no concurrency with CPU online. */
> > -	hfi_set_hw_table(hfi_instance);
> > -	hfi_enable();
> > +	if (hfi_thermal_clients_num > 0)
> > +		hfi_do_enable(hfi_instance);
> >  }
> >  
> > -static int hfi_do_disable(void)
> > +static int hfi_syscore_suspend(void)
> >  {
> > -	/* No locking needed. There is no concurrency with CPU offline. */
> >  	hfi_disable();
> >  
> >  	return 0;
> >  }
> >  
> >  static struct syscore_ops hfi_pm_ops = {
> > -	.resume = hfi_do_enable,
> > -	.suspend = hfi_do_disable,
> > +	.resume = hfi_syscore_resume,
> > +	.suspend = hfi_syscore_suspend,
> > +};
> > +
> > +static int hfi_thermal_notify(struct notifier_block *nb, unsigned long state,
> > +			      void *_notify)
> > +{
> > +	struct thermal_genl_notify *notify = _notify;
> > +	struct hfi_instance *hfi_instance;
> > +	smp_call_func_t func;
> > +	unsigned int cpu;
> > +	int i;
> > +
> > +	if (notify->mcgrp != THERMAL_GENL_EVENT_GROUP)
> > +		return NOTIFY_DONE;
> > +
> > +	if (state != THERMAL_NOTIFY_BIND && state != THERMAL_NOTIFY_UNBIND)
> > +		return NOTIFY_DONE;
> > +
> > +	mutex_lock(&hfi_instance_lock);
> > +
> > +	switch (state) {
> > +	case THERMAL_NOTIFY_BIND:
> > +		hfi_thermal_clients_num++;
> > +		break;
> 
> Perhaps here you could call intel_hfi_enable()
> 
> > +	case THERMAL_NOTIFY_UNBIND:
> > +		hfi_thermal_clients_num--;
> > +		break;
> > +	}
> 
> and here intel_hfi_disable().
> 
> > +
> > +	if (hfi_thermal_clients_num > 0)
> > +		func = hfi_do_enable;
> > +	else
> > +		func = hfi_do_disable;
> > +
> > +	for (i = 0; i < max_hfi_instances; i++) {
> > +		hfi_instance = &hfi_instances[i];
> > +		if (cpumask_empty(hfi_instance->cpus))
> > +			continue;
> > +
> > +		cpu = cpumask_any(hfi_instance->cpus);
> > +		smp_call_function_single(cpu, func, hfi_instance, true);
> > +	}
> 
> This block would go in a helper function.
> 
> I know this is beyond the scope of the patchset but it would make the
> logic more generic for other clients to use.
> > +
> > +	mutex_unlock(&hfi_instance_lock);
> > +
> > +	return NOTIFY_OK;
> > +}
> > +
> > +static struct notifier_block hfi_thermal_nb = {
> > +	.notifier_call = hfi_thermal_notify,
> >  };
> >  
> >  void __init intel_hfi_init(void)
> > @@ -628,10 +697,16 @@ void __init intel_hfi_init(void)
> >  	if (!hfi_updates_wq)
> >  		goto err_nomem;
> >  
> > +	if (thermal_genl_register_notifier(&hfi_thermal_nb))
> > +		goto err_nl_notif;
> > +
> >  	register_syscore_ops(&hfi_pm_ops);
> >  
> >  	return;
> >  
> > +err_nl_notif:
> > +	destroy_workqueue(hfi_updates_wq);
> > +
> >  err_nomem:
> >  	for (j = 0; j < i; ++j) {
> >  		hfi_instance = &hfi_instances[j];
> > -- 
> > 2.34.1
> >
diff mbox series

Patch

diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 3b04c6ec4fca..5e1e2b5269b7 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -159,6 +159,7 @@  struct hfi_cpu_info {
 static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
 
 static int max_hfi_instances;
+static int hfi_thermal_clients_num;
 static struct hfi_instance *hfi_instances;
 
 static struct hfi_features hfi_features;
@@ -477,8 +478,11 @@  void intel_hfi_online(unsigned int cpu)
 enable:
 	cpumask_set_cpu(cpu, hfi_instance->cpus);
 
-	/* Enable this HFI instance if this is its first online CPU. */
-	if (cpumask_weight(hfi_instance->cpus) == 1) {
+	/*
+	 * Enable this HFI instance if this is its first online CPU and
+	 * there are user-space clients of thermal events.
+	 */
+	if (cpumask_weight(hfi_instance->cpus) == 1 && hfi_thermal_clients_num > 0) {
 		hfi_set_hw_table(hfi_instance);
 		hfi_enable();
 	}
@@ -573,28 +577,93 @@  static __init int hfi_parse_features(void)
 	return 0;
 }
 
-static void hfi_do_enable(void)
+/*
+ * HFI enable/disable run in non-concurrent manner on boot CPU in syscore
+ * callbacks or under protection of hfi_instance_lock.
+ */
+static void hfi_do_enable(void *ptr)
+{
+	struct hfi_instance *hfi_instance = ptr;
+
+	hfi_set_hw_table(hfi_instance);
+	hfi_enable();
+}
+
+static void hfi_do_disable(void *ptr)
+{
+	hfi_disable();
+}
+
+static void hfi_syscore_resume(void)
 {
 	/* This code runs only on the boot CPU. */
 	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
 	struct hfi_instance *hfi_instance = info->hfi_instance;
 
-	/* No locking needed. There is no concurrency with CPU online. */
-	hfi_set_hw_table(hfi_instance);
-	hfi_enable();
+	if (hfi_thermal_clients_num > 0)
+		hfi_do_enable(hfi_instance);
 }
 
-static int hfi_do_disable(void)
+static int hfi_syscore_suspend(void)
 {
-	/* No locking needed. There is no concurrency with CPU offline. */
 	hfi_disable();
 
 	return 0;
 }
 
 static struct syscore_ops hfi_pm_ops = {
-	.resume = hfi_do_enable,
-	.suspend = hfi_do_disable,
+	.resume = hfi_syscore_resume,
+	.suspend = hfi_syscore_suspend,
+};
+
+static int hfi_thermal_notify(struct notifier_block *nb, unsigned long state,
+			      void *_notify)
+{
+	struct thermal_genl_notify *notify = _notify;
+	struct hfi_instance *hfi_instance;
+	smp_call_func_t func;
+	unsigned int cpu;
+	int i;
+
+	if (notify->mcgrp != THERMAL_GENL_EVENT_GROUP)
+		return NOTIFY_DONE;
+
+	if (state != THERMAL_NOTIFY_BIND && state != THERMAL_NOTIFY_UNBIND)
+		return NOTIFY_DONE;
+
+	mutex_lock(&hfi_instance_lock);
+
+	switch (state) {
+	case THERMAL_NOTIFY_BIND:
+		hfi_thermal_clients_num++;
+		break;
+
+	case THERMAL_NOTIFY_UNBIND:
+		hfi_thermal_clients_num--;
+		break;
+	}
+
+	if (hfi_thermal_clients_num > 0)
+		func = hfi_do_enable;
+	else
+		func = hfi_do_disable;
+
+	for (i = 0; i < max_hfi_instances; i++) {
+		hfi_instance = &hfi_instances[i];
+		if (cpumask_empty(hfi_instance->cpus))
+			continue;
+
+		cpu = cpumask_any(hfi_instance->cpus);
+		smp_call_function_single(cpu, func, hfi_instance, true);
+	}
+
+	mutex_unlock(&hfi_instance_lock);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block hfi_thermal_nb = {
+	.notifier_call = hfi_thermal_notify,
 };
 
 void __init intel_hfi_init(void)
@@ -628,10 +697,16 @@  void __init intel_hfi_init(void)
 	if (!hfi_updates_wq)
 		goto err_nomem;
 
+	if (thermal_genl_register_notifier(&hfi_thermal_nb))
+		goto err_nl_notif;
+
 	register_syscore_ops(&hfi_pm_ops);
 
 	return;
 
+err_nl_notif:
+	destroy_workqueue(hfi_updates_wq);
+
 err_nomem:
 	for (j = 0; j < i; ++j) {
 		hfi_instance = &hfi_instances[j];