diff mbox series

[v4,3/3] thermal: intel: hfi: Enable interface only when required

Message ID 20240212161615.161935-4-stanislaw.gruszka@linux.intel.com (mailing list archive)
State Changes Requested, archived
Headers show
Series thermal/netlink/intel_hfi: Enable HFI feature only when required | expand

Commit Message

Stanislaw Gruszka Feb. 12, 2024, 4:16 p.m. UTC
Enable and disable hardware feedback interface (HFI) when user space
handler is present. For example, enable HFI, when intel-speed-select or
Intel Low Power daemon is running and subscribing to thermal netlink
events. When user space handlers exit or remove subscription for
thermal netlink events, disable HFI.

Summary of changes:

- Register a thermal genetlink notifier

- In the notifier, process THERMAL_NOTIFY_BIND and THERMAL_NOTIFY_UNBIND
reason codes to count number of thermal event group netlink multicast
clients. If thermal netlink group has any listener enable HFI on all
packages. If there are no listener disable HFI on all packages.

- When CPU is online, instead of blindly enabling HFI, check if
the thermal netlink group has any listener. This will make sure that
HFI is not enabled by default during boot time.

- Actual processing to enable/disable matches what is done in
suspend/resume callbacks. Create two functions hfi_do_enable()
and hfi_do_disable(), which can be called from  the netlink notifier
callback and suspend/resume callbacks.

Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
---
 drivers/thermal/intel/intel_hfi.c | 95 +++++++++++++++++++++++++++----
 1 file changed, 85 insertions(+), 10 deletions(-)

Comments

Rafael J. Wysocki Feb. 13, 2024, 1:59 p.m. UTC | #1
On Mon, Feb 12, 2024 at 5:16 PM Stanislaw Gruszka
<stanislaw.gruszka@linux.intel.com> wrote:
>
> Enable and disable hardware feedback interface (HFI) when user space
> handler is present. For example, enable HFI, when intel-speed-select or
> Intel Low Power daemon is running and subscribing to thermal netlink
> events. When user space handlers exit or remove subscription for
> thermal netlink events, disable HFI.
>
> Summary of changes:
>
> - Register a thermal genetlink notifier
>
> - In the notifier, process THERMAL_NOTIFY_BIND and THERMAL_NOTIFY_UNBIND
> reason codes to count number of thermal event group netlink multicast
> clients. If thermal netlink group has any listener enable HFI on all
> packages. If there are no listener disable HFI on all packages.
>
> - When CPU is online, instead of blindly enabling HFI, check if
> the thermal netlink group has any listener. This will make sure that
> HFI is not enabled by default during boot time.
>
> - Actual processing to enable/disable matches what is done in
> suspend/resume callbacks. Create two functions hfi_do_enable()
> and hfi_do_disable(), which can be called from  the netlink notifier
> callback and suspend/resume callbacks.
>
> Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
> ---
>  drivers/thermal/intel/intel_hfi.c | 95 +++++++++++++++++++++++++++----
>  1 file changed, 85 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> index 3b04c6ec4fca..5e1e2b5269b7 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -159,6 +159,7 @@ struct hfi_cpu_info {
>  static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
>
>  static int max_hfi_instances;
> +static int hfi_thermal_clients_num;
>  static struct hfi_instance *hfi_instances;
>
>  static struct hfi_features hfi_features;
> @@ -477,8 +478,11 @@ void intel_hfi_online(unsigned int cpu)
>  enable:
>         cpumask_set_cpu(cpu, hfi_instance->cpus);
>
> -       /* Enable this HFI instance if this is its first online CPU. */
> -       if (cpumask_weight(hfi_instance->cpus) == 1) {
> +       /*
> +        * Enable this HFI instance if this is its first online CPU and
> +        * there are user-space clients of thermal events.
> +        */
> +       if (cpumask_weight(hfi_instance->cpus) == 1 && hfi_thermal_clients_num > 0) {
>                 hfi_set_hw_table(hfi_instance);
>                 hfi_enable();
>         }
> @@ -573,28 +577,93 @@ static __init int hfi_parse_features(void)
>         return 0;
>  }
>
> -static void hfi_do_enable(void)
> +/*
> + * HFI enable/disable run in non-concurrent manner on boot CPU in syscore
> + * callbacks or under protection of hfi_instance_lock.
> + */

In the comment above I would say "If concurrency is not prevented by
other means, the HFI enable/disable routines must be called under
hfi_instance_lock." and I would retain the comments below (they don't
hurt IMO).

> +static void hfi_do_enable(void *ptr)

I would call this hfi_enable_instance().

> +{
> +       struct hfi_instance *hfi_instance = ptr;

Why is this variable needed ro even useful?  prt can be passed
directly to hfi_set_hw_table().

> +
> +       hfi_set_hw_table(hfi_instance);
> +       hfi_enable();
> +}
> +
> +static void hfi_do_disable(void *ptr)

And I'd call this hfi_disable_instance().

> +{
> +       hfi_disable();
> +}
> +
> +static void hfi_syscore_resume(void)
>  {
>         /* This code runs only on the boot CPU. */
>         struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
>         struct hfi_instance *hfi_instance = info->hfi_instance;
>
> -       /* No locking needed. There is no concurrency with CPU online. */
> -       hfi_set_hw_table(hfi_instance);
> -       hfi_enable();
> +       if (hfi_thermal_clients_num > 0)
> +               hfi_do_enable(hfi_instance);
>  }
>
> -static int hfi_do_disable(void)
> +static int hfi_syscore_suspend(void)
>  {
> -       /* No locking needed. There is no concurrency with CPU offline. */
>         hfi_disable();
>
>         return 0;
>  }
>
>  static struct syscore_ops hfi_pm_ops = {
> -       .resume = hfi_do_enable,
> -       .suspend = hfi_do_disable,
> +       .resume = hfi_syscore_resume,
> +       .suspend = hfi_syscore_suspend,
> +};
> +
> +static int hfi_thermal_notify(struct notifier_block *nb, unsigned long state,
> +                             void *_notify)
> +{
> +       struct thermal_genl_notify *notify = _notify;
> +       struct hfi_instance *hfi_instance;
> +       smp_call_func_t func;
> +       unsigned int cpu;
> +       int i;
> +
> +       if (notify->mcgrp != THERMAL_GENL_EVENT_GROUP)
> +               return NOTIFY_DONE;
> +
> +       if (state != THERMAL_NOTIFY_BIND && state != THERMAL_NOTIFY_UNBIND)
> +               return NOTIFY_DONE;
> +
> +       mutex_lock(&hfi_instance_lock);
> +
> +       switch (state) {
> +       case THERMAL_NOTIFY_BIND:
> +               hfi_thermal_clients_num++;
> +               break;
> +
> +       case THERMAL_NOTIFY_UNBIND:
> +               hfi_thermal_clients_num--;
> +               break;
> +       }
> +
> +       if (hfi_thermal_clients_num > 0)
> +               func = hfi_do_enable;
> +       else
> +               func = hfi_do_disable;
> +
> +       for (i = 0; i < max_hfi_instances; i++) {
> +               hfi_instance = &hfi_instances[i];
> +               if (cpumask_empty(hfi_instance->cpus))
> +                       continue;
> +
> +               cpu = cpumask_any(hfi_instance->cpus);
> +               smp_call_function_single(cpu, func, hfi_instance, true);
> +       }
> +
> +       mutex_unlock(&hfi_instance_lock);

So AFAICS, one instance can be enabled multiple times because of this.
  I guess that's OK?  In any case, it would be kind of nice to leave a
note regarding it somewhere here.

> +
> +       return NOTIFY_OK;
> +}
> +
> +static struct notifier_block hfi_thermal_nb = {
> +       .notifier_call = hfi_thermal_notify,
>  };
>
>  void __init intel_hfi_init(void)
> @@ -628,10 +697,16 @@ void __init intel_hfi_init(void)
>         if (!hfi_updates_wq)
>                 goto err_nomem;
>
> +       if (thermal_genl_register_notifier(&hfi_thermal_nb))
> +               goto err_nl_notif;

Is it possible for any clients to be there before the notifier is
registered?  If not, it would be good to add a comment about it.

> +
>         register_syscore_ops(&hfi_pm_ops);
>
>         return;
>
> +err_nl_notif:
> +       destroy_workqueue(hfi_updates_wq);
> +
>  err_nomem:
>         for (j = 0; j < i; ++j) {
>                 hfi_instance = &hfi_instances[j];
> --
Stanislaw Gruszka Feb. 22, 2024, 4:53 p.m. UTC | #2
On Tue, Feb 13, 2024 at 02:59:10PM +0100, Rafael J. Wysocki wrote:
> > -static void hfi_do_enable(void)
> > +/*
> > + * HFI enable/disable run in non-concurrent manner on boot CPU in syscore
> > + * callbacks or under protection of hfi_instance_lock.
> > + */
> 
> In the comment above I would say "If concurrency is not prevented by
> other means, the HFI enable/disable routines must be called under
> hfi_instance_lock." 

Ok. Will reword this way.

> and I would retain the comments below (they don't
> hurt IMO).

I found those comments somewhat confusing. FWICT at worst
what can happen when enable/resume race CPU online and
disable/suspend race with CPU offline is enable twice
or disable twice. What I think is fine, though plan to
check this (see below).

> > +static void hfi_do_enable(void *ptr)
> 
> I would call this hfi_enable_instance().
> 
> > +{
> > +       struct hfi_instance *hfi_instance = ptr;
> 
> Why is this variable needed ro even useful?  prt can be passed
> directly to hfi_set_hw_table().

Ok, will remove it.

> > +
> > +       hfi_set_hw_table(hfi_instance);
> > +       hfi_enable();
> > +}
> > +
> > +static void hfi_do_disable(void *ptr)
> 
> And I'd call this hfi_disable_instance().

Ok.

> > +static int hfi_thermal_notify(struct notifier_block *nb, unsigned long state,
> > +                             void *_notify)
> > +{
> > +       struct thermal_genl_notify *notify = _notify;
> > +       struct hfi_instance *hfi_instance;
> > +       smp_call_func_t func;
> > +       unsigned int cpu;
> > +       int i;
> > +
> > +       if (notify->mcgrp != THERMAL_GENL_EVENT_GROUP)
> > +               return NOTIFY_DONE;
> > +
> > +       if (state != THERMAL_NOTIFY_BIND && state != THERMAL_NOTIFY_UNBIND)
> > +               return NOTIFY_DONE;
> > +
> > +       mutex_lock(&hfi_instance_lock);
> > +
> > +       switch (state) {
> > +       case THERMAL_NOTIFY_BIND:
> > +               hfi_thermal_clients_num++;
> > +               break;
> > +
> > +       case THERMAL_NOTIFY_UNBIND:
> > +               hfi_thermal_clients_num--;
> > +               break;
> > +       }
> > +
> > +       if (hfi_thermal_clients_num > 0)
> > +               func = hfi_do_enable;
> > +       else
> > +               func = hfi_do_disable;
> > +
> > +       for (i = 0; i < max_hfi_instances; i++) {
> > +               hfi_instance = &hfi_instances[i];
> > +               if (cpumask_empty(hfi_instance->cpus))
> > +                       continue;
> > +
> > +               cpu = cpumask_any(hfi_instance->cpus);
> > +               smp_call_function_single(cpu, func, hfi_instance, true);
> > +       }
> > +
> > +       mutex_unlock(&hfi_instance_lock);
> 
> So AFAICS, one instance can be enabled multiple times because of this.
>   I guess that's OK?  In any case, it would be kind of nice to leave a
> note regarding it somewhere here.

It's write the same values to the same registers. So I think this 
should be fine. However after your comment I start to think there
perhaps could be some side-effect of writing the registers.
I'll double check (previously I verified that double enable works,
but only on MTL) or eventually rearrange code to do not enable already
enabled interface.

> > +
> > +       return NOTIFY_OK;
> > +}
> > +
> > +static struct notifier_block hfi_thermal_nb = {
> > +       .notifier_call = hfi_thermal_notify,
> >  };
> >
> >  void __init intel_hfi_init(void)
> > @@ -628,10 +697,16 @@ void __init intel_hfi_init(void)
> >         if (!hfi_updates_wq)
> >                 goto err_nomem;
> >
> > +       if (thermal_genl_register_notifier(&hfi_thermal_nb))
> > +               goto err_nl_notif;
> 
> Is it possible for any clients to be there before the notifier is
> registered?  If not, it would be good to add a comment about it.

HFI is build-in so it's started before any user space. I added note about that
in the cover letter but indeed it should be comment in the code. Will fix.  

Regards
Stanislaw
diff mbox series

Patch

diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 3b04c6ec4fca..5e1e2b5269b7 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -159,6 +159,7 @@  struct hfi_cpu_info {
 static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
 
 static int max_hfi_instances;
+static int hfi_thermal_clients_num;
 static struct hfi_instance *hfi_instances;
 
 static struct hfi_features hfi_features;
@@ -477,8 +478,11 @@  void intel_hfi_online(unsigned int cpu)
 enable:
 	cpumask_set_cpu(cpu, hfi_instance->cpus);
 
-	/* Enable this HFI instance if this is its first online CPU. */
-	if (cpumask_weight(hfi_instance->cpus) == 1) {
+	/*
+	 * Enable this HFI instance if this is its first online CPU and
+	 * there are user-space clients of thermal events.
+	 */
+	if (cpumask_weight(hfi_instance->cpus) == 1 && hfi_thermal_clients_num > 0) {
 		hfi_set_hw_table(hfi_instance);
 		hfi_enable();
 	}
@@ -573,28 +577,93 @@  static __init int hfi_parse_features(void)
 	return 0;
 }
 
-static void hfi_do_enable(void)
+/*
+ * HFI enable/disable run in non-concurrent manner on boot CPU in syscore
+ * callbacks or under protection of hfi_instance_lock.
+ */
+static void hfi_do_enable(void *ptr)
+{
+	struct hfi_instance *hfi_instance = ptr;
+
+	hfi_set_hw_table(hfi_instance);
+	hfi_enable();
+}
+
+static void hfi_do_disable(void *ptr)
+{
+	hfi_disable();
+}
+
+static void hfi_syscore_resume(void)
 {
 	/* This code runs only on the boot CPU. */
 	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
 	struct hfi_instance *hfi_instance = info->hfi_instance;
 
-	/* No locking needed. There is no concurrency with CPU online. */
-	hfi_set_hw_table(hfi_instance);
-	hfi_enable();
+	if (hfi_thermal_clients_num > 0)
+		hfi_do_enable(hfi_instance);
 }
 
-static int hfi_do_disable(void)
+static int hfi_syscore_suspend(void)
 {
-	/* No locking needed. There is no concurrency with CPU offline. */
 	hfi_disable();
 
 	return 0;
 }
 
 static struct syscore_ops hfi_pm_ops = {
-	.resume = hfi_do_enable,
-	.suspend = hfi_do_disable,
+	.resume = hfi_syscore_resume,
+	.suspend = hfi_syscore_suspend,
+};
+
+static int hfi_thermal_notify(struct notifier_block *nb, unsigned long state,
+			      void *_notify)
+{
+	struct thermal_genl_notify *notify = _notify;
+	struct hfi_instance *hfi_instance;
+	smp_call_func_t func;
+	unsigned int cpu;
+	int i;
+
+	if (notify->mcgrp != THERMAL_GENL_EVENT_GROUP)
+		return NOTIFY_DONE;
+
+	if (state != THERMAL_NOTIFY_BIND && state != THERMAL_NOTIFY_UNBIND)
+		return NOTIFY_DONE;
+
+	mutex_lock(&hfi_instance_lock);
+
+	switch (state) {
+	case THERMAL_NOTIFY_BIND:
+		hfi_thermal_clients_num++;
+		break;
+
+	case THERMAL_NOTIFY_UNBIND:
+		hfi_thermal_clients_num--;
+		break;
+	}
+
+	if (hfi_thermal_clients_num > 0)
+		func = hfi_do_enable;
+	else
+		func = hfi_do_disable;
+
+	for (i = 0; i < max_hfi_instances; i++) {
+		hfi_instance = &hfi_instances[i];
+		if (cpumask_empty(hfi_instance->cpus))
+			continue;
+
+		cpu = cpumask_any(hfi_instance->cpus);
+		smp_call_function_single(cpu, func, hfi_instance, true);
+	}
+
+	mutex_unlock(&hfi_instance_lock);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block hfi_thermal_nb = {
+	.notifier_call = hfi_thermal_notify,
 };
 
 void __init intel_hfi_init(void)
@@ -628,10 +697,16 @@  void __init intel_hfi_init(void)
 	if (!hfi_updates_wq)
 		goto err_nomem;
 
+	if (thermal_genl_register_notifier(&hfi_thermal_nb))
+		goto err_nl_notif;
+
 	register_syscore_ops(&hfi_pm_ops);
 
 	return;
 
+err_nl_notif:
+	destroy_workqueue(hfi_updates_wq);
+
 err_nomem:
 	for (j = 0; j < i; ++j) {
 		hfi_instance = &hfi_instances[j];