diff mbox series

[v5,2/6] powercap/drivers/dtpm: Add hierarchy creation

Message ID 20211218130014.4037640-3-daniel.lezcano@linaro.org (mailing list archive)
State New, archived
Delegated to: Daniel Lezcano
Headers show
Series powercap/drivers/dtpm: Create the dtpm hierarchy | expand

Commit Message

Daniel Lezcano Dec. 18, 2021, 1 p.m. UTC
The DTPM framework is available but without a way to configure it.

This change provides a way to create a hierarchy of DTPM node where
the power consumption reflects the sum of the children's power
consumption.

It is up to the platform to specify an array of dtpm nodes where each
element has a pointer to its parent, except the top most one. The type
of the node gives the indication of which initialization callback to
call. At this time, we can create a virtual node, where its purpose is
to be a parent in the hierarchy, and a DT node where the name
describes its path.

In order to ensure a nice self-encapsulation, the DTPM table
descriptors contains a couple of initialization functions, one to
setup the DTPM backend and one to initialize it up. With this
approach, the DTPM framework has a very few material to export.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/powercap/Kconfig    |   1 +
 drivers/powercap/dtpm.c     | 155 ++++++++++++++++++++++++++++++++++--
 drivers/powercap/dtpm_cpu.c |   2 +-
 include/linux/dtpm.h        |  21 ++++-
 4 files changed, 171 insertions(+), 8 deletions(-)

Comments

Ulf Hansson Dec. 31, 2021, 1:45 p.m. UTC | #1
On Sat, 18 Dec 2021 at 14:00, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>
> The DTPM framework is available but without a way to configure it.
>
> This change provides a way to create a hierarchy of DTPM node where
> the power consumption reflects the sum of the children's power
> consumption.
>
> It is up to the platform to specify an array of dtpm nodes where each
> element has a pointer to its parent, except the top most one. The type
> of the node gives the indication of which initialization callback to
> call. At this time, we can create a virtual node, where its purpose is
> to be a parent in the hierarchy, and a DT node where the name
> describes its path.
>
> In order to ensure a nice self-encapsulation, the DTPM table
> descriptors contains a couple of initialization functions, one to
> setup the DTPM backend and one to initialize it up. With this
> approach, the DTPM framework has a very few material to export.
>
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---
>  drivers/powercap/Kconfig    |   1 +
>  drivers/powercap/dtpm.c     | 155 ++++++++++++++++++++++++++++++++++--
>  drivers/powercap/dtpm_cpu.c |   2 +-
>  include/linux/dtpm.h        |  21 ++++-
>  4 files changed, 171 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
> index 8242e8c5ed77..b1ca339957e3 100644
> --- a/drivers/powercap/Kconfig
> +++ b/drivers/powercap/Kconfig
> @@ -46,6 +46,7 @@ config IDLE_INJECT
>
>  config DTPM
>         bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)"
> +       depends on OF
>         help
>           This enables support for the power capping for the dynamic
>           thermal power management userspace engine.
> diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
> index 0fe70687c198..1611c86de5f5 100644
> --- a/drivers/powercap/dtpm.c
> +++ b/drivers/powercap/dtpm.c
> @@ -23,6 +23,7 @@
>  #include <linux/powercap.h>
>  #include <linux/slab.h>
>  #include <linux/mutex.h>
> +#include <linux/of.h>
>
>  #define DTPM_POWER_LIMIT_FLAG 0
>
> @@ -461,19 +462,163 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
>         return 0;
>  }
>
> -static int __init init_dtpm(void)
> +static struct dtpm *dtpm_setup_virtual(const struct dtpm_node *hierarchy,
> +                                      struct dtpm *parent)
> +{
> +       struct dtpm *dtpm;
> +       int ret;
> +
> +       dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL);
> +       if (!dtpm)
> +               return ERR_PTR(-ENOMEM);
> +       dtpm_init(dtpm, NULL);
> +
> +       ret = dtpm_register(hierarchy->name, dtpm, parent);
> +       if (ret) {
> +               pr_err("Failed to register dtpm node '%s': %d\n",
> +                      hierarchy->name, ret);
> +               kfree(dtpm);
> +               return ERR_PTR(ret);
> +       }
> +
> +       return dtpm;
> +}
> +
> +static struct dtpm *dtpm_setup_dt(const struct dtpm_node *hierarchy,
> +                                 struct dtpm *parent)
> +{
> +       struct dtpm_descr *dtpm_descr;
> +       struct device_node *np;
> +       int ret;
> +
> +       np = of_find_node_by_path(hierarchy->name);
> +       if (!np) {
> +               pr_err("Failed to find '%s'\n", hierarchy->name);
> +               return ERR_PTR(-ENXIO);
> +       }
> +
> +       for_each_dtpm_table(dtpm_descr) {
> +
> +               ret = dtpm_descr->setup(parent, np);

This will unconditionally call the ->setup callback() for each dtpm
desc in the dtpm table. At this point the ->setup() callback has not
been assigned by anyone that uses DTPM_DECLARE(), so if this would be
called, it would trigger a NULL pointer dereference error.

On the other hand, we don't have someone calling
dtpm_create_hierarchy() yet, so this code doesn't get exercised, but
it still looks a bit odd to me. Maybe squashing patch2 and patch3 is
an option?

> +               if (ret) {
> +                       pr_err("Failed to setup '%s': %d\n", hierarchy->name, ret);
> +                       of_node_put(np);
> +                       return ERR_PTR(ret);
> +               }
> +
> +               of_node_put(np);

This will be called for every loop in the dtpm table. This is wrong,
you only want to call it once, outside the loop.

> +       }
> +
> +       /*
> +        * By returning a NULL pointer, we let know the caller there
> +        * is no child for us as we are a leaf of the tree
> +        */
> +       return NULL;
> +}
> +
> +typedef struct dtpm * (*dtpm_node_callback_t)(const struct dtpm_node *, struct dtpm *);
> +
> +dtpm_node_callback_t dtpm_node_callback[] = {
> +       [DTPM_NODE_VIRTUAL] = dtpm_setup_virtual,
> +       [DTPM_NODE_DT] = dtpm_setup_dt,
> +};
> +
> +static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
> +                              const struct dtpm_node *it, struct dtpm *parent)
> +{
> +       struct dtpm *dtpm;
> +       int i, ret;
> +
> +       for (i = 0; hierarchy[i].name; i++) {
> +
> +               if (hierarchy[i].parent != it)
> +                       continue;
> +
> +               dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
> +               if (!dtpm || IS_ERR(dtpm))
> +                       continue;
> +
> +               ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);

Why do you need to recursively call dtpm_for_each_child() here?

Is there a restriction on how the dtpm core code manages adding
children/parents?

> +               if (ret)
> +                       return ret;
> +       }
> +
> +       return 0;
> +}
> +
> +/**
> + * dtpm_create_hierarchy - Create the dtpm hierarchy
> + * @hierarchy: An array of struct dtpm_node describing the hierarchy
> + *
> + * The function is called by the platform specific code with the
> + * description of the different node in the hierarchy. It creates the
> + * tree in the sysfs filesystem under the powercap dtpm entry.
> + *
> + * The expected tree has the format:
> + *
> + * struct dtpm_node hierarchy[] = {
> + *     [0] { .name = "topmost" },

For clarity, I think we should also specify DTPM_NODE_VIRTUAL here.

> + *      [1] { .name = "package", .parent = &hierarchy[0] },

Ditto.

> + *      [2] { .name = "/cpus/cpu0", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
> + *      [3] { .name = "/cpus/cpu1", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
> + *      [4] { .name = "/cpus/cpu2", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
> + *      [5] { .name = "/cpus/cpu3", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
> + *     [6] { }
> + * };
> + *
> + * The last element is always an empty one and marks the end of the
> + * array.
> + *
> + * Return: zero on success, a negative value in case of error. Errors
> + * are reported back from the underlying functions.
> + */
> +int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table)
>  {
> +       const struct of_device_id *match;
> +       const struct dtpm_node *hierarchy;
>         struct dtpm_descr *dtpm_descr;
> +       struct device_node *np;
> +       int ret;
> +
> +       np = of_find_node_by_path("/");
> +       if (!np)
> +               return -ENODEV;
> +
> +       match = of_match_node(dtpm_match_table, np);
>
> +       of_node_put(np);
> +
> +       if (!match)
> +               return -ENODEV;
> +
> +       hierarchy = match->data;
> +       if (!hierarchy)
> +               return -EFAULT;
> +
> +       ret = dtpm_for_each_child(hierarchy, NULL, NULL);
> +       if (ret)
> +               return ret;
> +
> +       for_each_dtpm_table(dtpm_descr) {
> +
> +               if (!dtpm_descr->init)
> +                       continue;
> +
> +               dtpm_descr->init();
> +       }
> +
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(dtpm_create_hierarchy);
> +
> +static int __init init_dtpm(void)
> +{
>         pct = powercap_register_control_type(NULL, "dtpm", NULL);
>         if (IS_ERR(pct)) {
>                 pr_err("Failed to register control type\n");
>                 return PTR_ERR(pct);
>         }

It looks like powercap_register_control_type() should be able to be
called from dtpm_create_hierarchy(). In this way we can simply drop
the initcall below, altogether.

Of course, that assumes that dtpm_create_hierachy() is being called
from a regular module_platform_driver() path - or at least from a
later initcall than fs_initcall(), which is when the "powercap_class"
is being registered. But that sounds like a reasonable assumption we
should be able to make, no?

>
> -       for_each_dtpm_table(dtpm_descr)
> -               dtpm_descr->init();
> -
>         return 0;
>  }
> -late_initcall(init_dtpm);
> +fs_initcall_sync(init_dtpm);
> diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
> index b740866b228d..6bffb44c75aa 100644
> --- a/drivers/powercap/dtpm_cpu.c
> +++ b/drivers/powercap/dtpm_cpu.c
> @@ -269,4 +269,4 @@ static int __init dtpm_cpu_init(void)
>         return 0;
>  }
>
> -DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init);
> +DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init, NULL);
> diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
> index d37e5d06a357..5a6b31eaf7e4 100644
> --- a/include/linux/dtpm.h
> +++ b/include/linux/dtpm.h
> @@ -32,23 +32,39 @@ struct dtpm_ops {
>         void (*release)(struct dtpm *);
>  };
>
> +struct device_node;
> +
>  typedef int (*dtpm_init_t)(void);
> +typedef int (*dtpm_setup_t)(struct dtpm *, struct device_node *);
>
>  struct dtpm_descr {
>         dtpm_init_t init;
> +       dtpm_setup_t setup;
> +};
> +
> +enum DTPM_NODE_TYPE {
> +       DTPM_NODE_VIRTUAL = 0,
> +       DTPM_NODE_DT,
> +};
> +
> +struct dtpm_node {
> +       enum DTPM_NODE_TYPE type;
> +       const char *name;
> +       struct dtpm_node *parent;
>  };
>
>  /* Init section thermal table */
>  extern struct dtpm_descr __dtpm_table[];
>  extern struct dtpm_descr __dtpm_table_end[];
>
> -#define DTPM_TABLE_ENTRY(name, __init)                         \
> +#define DTPM_TABLE_ENTRY(name, __init, __setup)                        \
>         static struct dtpm_descr __dtpm_table_entry_##name      \
>         __used __section("__dtpm_table") = {                    \
>                 .init = __init,                                 \
> +               .setup = __setup,                               \
>         }
>
> -#define DTPM_DECLARE(name, init)       DTPM_TABLE_ENTRY(name, init)
> +#define DTPM_DECLARE(name, init, setup)        DTPM_TABLE_ENTRY(name, init, setup)
>
>  #define for_each_dtpm_table(__dtpm)    \
>         for (__dtpm = __dtpm_table;     \
> @@ -70,4 +86,5 @@ void dtpm_unregister(struct dtpm *dtpm);
>
>  int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent);
>
> +int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table);

To start simple, I think dtpm_create_hiearchy() is the sufficient
interface to add at this point.

However, it's quite likely that it's going to be called from a regular
module (SoC specific platform driver), which means it needs to manage
->remove() operations too. Anyway, I am fine if we look into that as
improvements on top of the $subject series.

>  #endif
> --
> 2.25.1
>

Kind regards
Uffe
Daniel Lezcano Jan. 5, 2022, 4 p.m. UTC | #2
On 31/12/2021 14:45, Ulf Hansson wrote:

[ ... ]

>> +static struct dtpm *dtpm_setup_dt(const struct dtpm_node *hierarchy,
>> +                                 struct dtpm *parent)
>> +{
>> +       struct dtpm_descr *dtpm_descr;
>> +       struct device_node *np;
>> +       int ret;
>> +
>> +       np = of_find_node_by_path(hierarchy->name);
>> +       if (!np) {
>> +               pr_err("Failed to find '%s'\n", hierarchy->name);
>> +               return ERR_PTR(-ENXIO);
>> +       }
>> +
>> +       for_each_dtpm_table(dtpm_descr) {
>> +
>> +               ret = dtpm_descr->setup(parent, np);
> 
> This will unconditionally call the ->setup callback() for each dtpm
> desc in the dtpm table. At this point the ->setup() callback has not
> been assigned by anyone that uses DTPM_DECLARE(), so if this would be
> called, it would trigger a NULL pointer dereference error.
> 
> On the other hand, we don't have someone calling
> dtpm_create_hierarchy() yet, so this code doesn't get exercised, but

Yes, that is the reason why the test is not here.

> it still looks a bit odd to me. Maybe squashing patch2 and patch3 is
> an option?
Sure

>> +               if (ret) {
>> +                       pr_err("Failed to setup '%s': %d\n", hierarchy->name, ret);
>> +                       of_node_put(np);
>> +                       return ERR_PTR(ret);
>> +               }
>> +
>> +               of_node_put(np);
> 
> This will be called for every loop in the dtpm table. This is wrong,
> you only want to call it once, outside the loop.

Right, good catch

>> +       }
>> +
>> +       /*
>> +        * By returning a NULL pointer, we let know the caller there
>> +        * is no child for us as we are a leaf of the tree
>> +        */
>> +       return NULL;
>> +}
>> +
>> +typedef struct dtpm * (*dtpm_node_callback_t)(const struct dtpm_node *, struct dtpm *);
>> +
>> +dtpm_node_callback_t dtpm_node_callback[] = {
>> +       [DTPM_NODE_VIRTUAL] = dtpm_setup_virtual,
>> +       [DTPM_NODE_DT] = dtpm_setup_dt,
>> +};
>> +
>> +static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
>> +                              const struct dtpm_node *it, struct dtpm *parent)
>> +{
>> +       struct dtpm *dtpm;
>> +       int i, ret;
>> +
>> +       for (i = 0; hierarchy[i].name; i++) {
>> +
>> +               if (hierarchy[i].parent != it)
>> +                       continue;
>> +
>> +               dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
>> +               if (!dtpm || IS_ERR(dtpm))
>> +                       continue;
>> +
>> +               ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);
> 
> Why do you need to recursively call dtpm_for_each_child() here?
> 
> Is there a restriction on how the dtpm core code manages adding
> children/parents?

[ ... ]

The recursive call is needed given the structure of the tree in an array
in order to connect with the parent.


>> + *
>> + * struct dtpm_node hierarchy[] = {
>> + *     [0] { .name = "topmost" },
> 
> For clarity, I think we should also specify DTPM_NODE_VIRTUAL here.
> 
>> + *      [1] { .name = "package", .parent = &hierarchy[0] },
> 
> Ditto.

Sure

[ ... ]

>> +static int __init init_dtpm(void)
>> +{
>>         pct = powercap_register_control_type(NULL, "dtpm", NULL);
>>         if (IS_ERR(pct)) {
>>                 pr_err("Failed to register control type\n");
>>                 return PTR_ERR(pct);
>>         }
> 
> It looks like powercap_register_control_type() should be able to be
> called from dtpm_create_hierarchy(). In this way we can simply drop
> the initcall below, altogether.
>
> Of course, that assumes that dtpm_create_hierachy() is being called
> from a regular module_platform_driver() path - or at least from a
> later initcall than fs_initcall(), which is when the "powercap_class"
> is being registered. But that sounds like a reasonable assumption we
> should be able to make, no?

Yes, agree. Good suggestion, I will do the change.

[ ... ]

>>  int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent);
>>
>> +int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table);
> 
> To start simple, I think dtpm_create_hiearchy() is the sufficient
> interface to add at this point.
> 
> However, it's quite likely that it's going to be called from a regular
> module (SoC specific platform driver), which means it needs to manage
> ->remove() operations too. Anyway, I am fine if we look into that as
> improvements on top of the $subject series.

Yes, ATM, the modules can not be unloaded on purpose. The removal can be
added later
Ulf Hansson Jan. 7, 2022, 3:54 p.m. UTC | #3
[...]

> >> +static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
> >> +                              const struct dtpm_node *it, struct dtpm *parent)
> >> +{
> >> +       struct dtpm *dtpm;
> >> +       int i, ret;
> >> +
> >> +       for (i = 0; hierarchy[i].name; i++) {
> >> +
> >> +               if (hierarchy[i].parent != it)
> >> +                       continue;
> >> +
> >> +               dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
> >> +               if (!dtpm || IS_ERR(dtpm))
> >> +                       continue;
> >> +
> >> +               ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);
> >
> > Why do you need to recursively call dtpm_for_each_child() here?
> >
> > Is there a restriction on how the dtpm core code manages adding
> > children/parents?
>
> [ ... ]
>
> The recursive call is needed given the structure of the tree in an array
> in order to connect with the parent.

Right, I believe I understand what you are trying to do here, but I am
not sure if this is the best approach to do this. Maybe it is.

The problem is that we are also allocating memory for a dtpm and we
call dtpm_register() on it in this execution path - and this memory
doesn't get freed up nor unregistered, if any of the later recursive
calls to dtpm_for_each_child() fails.

The point is, it looks like it can get rather messy with the recursive
calls to cope with the error path. Maybe it's easier to store the
allocated dtpms in a list somewhere and use this to also find a
reference of a parent?

Later on, when we may decide to implement "dtpm_destroy_hierarchy()"
(or whatever we would call such interface), you probably need a list
of the allocated dtpms anyway, don't you think?

[...]

Kind regards
Uffe
Daniel Lezcano Jan. 10, 2022, 3:55 p.m. UTC | #4
On 07/01/2022 16:54, Ulf Hansson wrote:
> [...]
> 
>>>> +static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
>>>> +                              const struct dtpm_node *it, struct dtpm *parent)
>>>> +{
>>>> +       struct dtpm *dtpm;
>>>> +       int i, ret;
>>>> +
>>>> +       for (i = 0; hierarchy[i].name; i++) {
>>>> +
>>>> +               if (hierarchy[i].parent != it)
>>>> +                       continue;
>>>> +
>>>> +               dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
>>>> +               if (!dtpm || IS_ERR(dtpm))
>>>> +                       continue;
>>>> +
>>>> +               ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);
>>>
>>> Why do you need to recursively call dtpm_for_each_child() here?
>>>
>>> Is there a restriction on how the dtpm core code manages adding
>>> children/parents?
>>
>> [ ... ]
>>
>> The recursive call is needed given the structure of the tree in an array
>> in order to connect with the parent.
> 
> Right, I believe I understand what you are trying to do here, but I am
> not sure if this is the best approach to do this. Maybe it is.
> 
> The problem is that we are also allocating memory for a dtpm and we
> call dtpm_register() on it in this execution path - and this memory
> doesn't get freed up nor unregistered, if any of the later recursive
> calls to dtpm_for_each_child() fails.
> 
> The point is, it looks like it can get rather messy with the recursive
> calls to cope with the error path. Maybe it's easier to store the
> allocated dtpms in a list somewhere and use this to also find a
> reference of a parent?

I think it is better to continue the construction with other nodes even
some of them failed to create, it should be a non critical issue. As an
analogy, if one thermal zone fails to create, the other thermal zones
are not removed.

In addition, that should allow multiple nodes description for different
DT setup for the same platform. That should fix the issue pointed by Bjorn.

> Later on, when we may decide to implement "dtpm_destroy_hierarchy()"
> (or whatever we would call such interface), you probably need a list
> of the allocated dtpms anyway, don't you think?

No it is not necessary, the dtpms list is the dtpm tree itself and it
can be destroyed recursively.
Ulf Hansson Jan. 11, 2022, 8:28 a.m. UTC | #5
On Mon, 10 Jan 2022 at 16:55, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>
> On 07/01/2022 16:54, Ulf Hansson wrote:
> > [...]
> >
> >>>> +static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
> >>>> +                              const struct dtpm_node *it, struct dtpm *parent)
> >>>> +{
> >>>> +       struct dtpm *dtpm;
> >>>> +       int i, ret;
> >>>> +
> >>>> +       for (i = 0; hierarchy[i].name; i++) {
> >>>> +
> >>>> +               if (hierarchy[i].parent != it)
> >>>> +                       continue;
> >>>> +
> >>>> +               dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
> >>>> +               if (!dtpm || IS_ERR(dtpm))
> >>>> +                       continue;
> >>>> +
> >>>> +               ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);
> >>>
> >>> Why do you need to recursively call dtpm_for_each_child() here?
> >>>
> >>> Is there a restriction on how the dtpm core code manages adding
> >>> children/parents?
> >>
> >> [ ... ]
> >>
> >> The recursive call is needed given the structure of the tree in an array
> >> in order to connect with the parent.
> >
> > Right, I believe I understand what you are trying to do here, but I am
> > not sure if this is the best approach to do this. Maybe it is.
> >
> > The problem is that we are also allocating memory for a dtpm and we
> > call dtpm_register() on it in this execution path - and this memory
> > doesn't get freed up nor unregistered, if any of the later recursive
> > calls to dtpm_for_each_child() fails.
> >
> > The point is, it looks like it can get rather messy with the recursive
> > calls to cope with the error path. Maybe it's easier to store the
> > allocated dtpms in a list somewhere and use this to also find a
> > reference of a parent?
>
> I think it is better to continue the construction with other nodes even
> some of them failed to create, it should be a non critical issue. As an
> analogy, if one thermal zone fails to create, the other thermal zones
> are not removed.

Well, what if it fails because its "consumer part" is waiting for some
resource to become available?

Maybe the devfreq driver/subsystem isn't available yet and causes
-EPROBE_DEFER, for example. Perhaps this isn't the way the dtpm
registration works currently, but sure it's worth considering when
going forward, no?

In any case, papering over the error seems quite scary to me. I would
much prefer if we instead could propagate the error code correctly to
the caller of dtpm_create_hierarchy(), to allow it to retry if
necessary.

>
> In addition, that should allow multiple nodes description for different
> DT setup for the same platform. That should fix the issue pointed by Bjorn.
>
> > Later on, when we may decide to implement "dtpm_destroy_hierarchy()"
> > (or whatever we would call such interface), you probably need a list
> > of the allocated dtpms anyway, don't you think?
>
> No it is not necessary, the dtpms list is the dtpm tree itself and it
> can be destroyed recursively.

I could quite figure out how that should work though, but I assume
that is what the ->release() callback in the struct dtpm_ops is there
to help with, in some way.

Kind regards
Uffe
Daniel Lezcano Jan. 11, 2022, 5:52 p.m. UTC | #6
On 11/01/2022 09:28, Ulf Hansson wrote:
> On Mon, 10 Jan 2022 at 16:55, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>>
>> On 07/01/2022 16:54, Ulf Hansson wrote:
>>> [...]
>>>
>>>>>> +static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
>>>>>> +                              const struct dtpm_node *it, struct dtpm *parent)
>>>>>> +{
>>>>>> +       struct dtpm *dtpm;
>>>>>> +       int i, ret;
>>>>>> +
>>>>>> +       for (i = 0; hierarchy[i].name; i++) {
>>>>>> +
>>>>>> +               if (hierarchy[i].parent != it)
>>>>>> +                       continue;
>>>>>> +
>>>>>> +               dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
>>>>>> +               if (!dtpm || IS_ERR(dtpm))
>>>>>> +                       continue;
>>>>>> +
>>>>>> +               ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);
>>>>>
>>>>> Why do you need to recursively call dtpm_for_each_child() here?
>>>>>
>>>>> Is there a restriction on how the dtpm core code manages adding
>>>>> children/parents?
>>>>
>>>> [ ... ]
>>>>
>>>> The recursive call is needed given the structure of the tree in an array
>>>> in order to connect with the parent.
>>>
>>> Right, I believe I understand what you are trying to do here, but I am
>>> not sure if this is the best approach to do this. Maybe it is.
>>>
>>> The problem is that we are also allocating memory for a dtpm and we
>>> call dtpm_register() on it in this execution path - and this memory
>>> doesn't get freed up nor unregistered, if any of the later recursive
>>> calls to dtpm_for_each_child() fails.
>>>
>>> The point is, it looks like it can get rather messy with the recursive
>>> calls to cope with the error path. Maybe it's easier to store the
>>> allocated dtpms in a list somewhere and use this to also find a
>>> reference of a parent?
>>
>> I think it is better to continue the construction with other nodes even
>> some of them failed to create, it should be a non critical issue. As an
>> analogy, if one thermal zone fails to create, the other thermal zones
>> are not removed.
> 
> Well, what if it fails because its "consumer part" is waiting for some
> resource to become available?
> 
> Maybe the devfreq driver/subsystem isn't available yet and causes
> -EPROBE_DEFER, for example. Perhaps this isn't the way the dtpm
> registration works currently, but sure it's worth considering when
> going forward, no?

It should be solved by the fact that the DTPM description is a module
and loaded after the system booted. The module loading ordering is
solved by userspace.

I agree, we could improve that but it is way too complex to be addressed
in a single series and should be part of a specific change IMO.

> In any case, papering over the error seems quite scary to me. I would
> much prefer if we instead could propagate the error code correctly to
> the caller of dtpm_create_hierarchy(), to allow it to retry if
> necessary.

It is really something we should be able to address later.

>> In addition, that should allow multiple nodes description for different
>> DT setup for the same platform. That should fix the issue pointed by Bjorn.
>>
>>> Later on, when we may decide to implement "dtpm_destroy_hierarchy()"
>>> (or whatever we would call such interface), you probably need a list
>>> of the allocated dtpms anyway, don't you think?
>>
>> No it is not necessary, the dtpms list is the dtpm tree itself and it
>> can be destroyed recursively.
> 
> I could quite figure out how that should work though, but I assume
> that is what the ->release() callback in the struct dtpm_ops is there
> to help with, in some way.
> 
> Kind regards
> Uffe
>
Ulf Hansson Jan. 12, 2022, noon UTC | #7
On Tue, 11 Jan 2022 at 18:52, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>
> On 11/01/2022 09:28, Ulf Hansson wrote:
> > On Mon, 10 Jan 2022 at 16:55, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
> >>
> >> On 07/01/2022 16:54, Ulf Hansson wrote:
> >>> [...]
> >>>
> >>>>>> +static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
> >>>>>> +                              const struct dtpm_node *it, struct dtpm *parent)
> >>>>>> +{
> >>>>>> +       struct dtpm *dtpm;
> >>>>>> +       int i, ret;
> >>>>>> +
> >>>>>> +       for (i = 0; hierarchy[i].name; i++) {
> >>>>>> +
> >>>>>> +               if (hierarchy[i].parent != it)
> >>>>>> +                       continue;
> >>>>>> +
> >>>>>> +               dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
> >>>>>> +               if (!dtpm || IS_ERR(dtpm))
> >>>>>> +                       continue;
> >>>>>> +
> >>>>>> +               ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);
> >>>>>
> >>>>> Why do you need to recursively call dtpm_for_each_child() here?
> >>>>>
> >>>>> Is there a restriction on how the dtpm core code manages adding
> >>>>> children/parents?
> >>>>
> >>>> [ ... ]
> >>>>
> >>>> The recursive call is needed given the structure of the tree in an array
> >>>> in order to connect with the parent.
> >>>
> >>> Right, I believe I understand what you are trying to do here, but I am
> >>> not sure if this is the best approach to do this. Maybe it is.
> >>>
> >>> The problem is that we are also allocating memory for a dtpm and we
> >>> call dtpm_register() on it in this execution path - and this memory
> >>> doesn't get freed up nor unregistered, if any of the later recursive
> >>> calls to dtpm_for_each_child() fails.
> >>>
> >>> The point is, it looks like it can get rather messy with the recursive
> >>> calls to cope with the error path. Maybe it's easier to store the
> >>> allocated dtpms in a list somewhere and use this to also find a
> >>> reference of a parent?
> >>
> >> I think it is better to continue the construction with other nodes even
> >> some of them failed to create, it should be a non critical issue. As an
> >> analogy, if one thermal zone fails to create, the other thermal zones
> >> are not removed.
> >
> > Well, what if it fails because its "consumer part" is waiting for some
> > resource to become available?
> >
> > Maybe the devfreq driver/subsystem isn't available yet and causes
> > -EPROBE_DEFER, for example. Perhaps this isn't the way the dtpm
> > registration works currently, but sure it's worth considering when
> > going forward, no?
>
> It should be solved by the fact that the DTPM description is a module
> and loaded after the system booted. The module loading ordering is
> solved by userspace.

Ideally, yes. However, drivers/subsystems in the kernel should respect
-EPROBE_DEFER. It's good practice to do that.

>
> I agree, we could improve that but it is way too complex to be addressed
> in a single series and should be part of a specific change IMO.

It's not my call to make, but I don't agree, sorry.

In my opinion, plain error handling to avoid leaking memory isn't
something that should be addressed later. At least if the problems are
already spotted during review.

>
> > In any case, papering over the error seems quite scary to me. I would
> > much prefer if we instead could propagate the error code correctly to
> > the caller of dtpm_create_hierarchy(), to allow it to retry if
> > necessary.
>
> It is really something we should be able to address later.
>

[...]

Kind regards
Uffe
Daniel Lezcano Jan. 14, 2022, 7:15 p.m. UTC | #8
On 12/01/2022 13:00, Ulf Hansson wrote:
> On Tue, 11 Jan 2022 at 18:52, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>>
>> On 11/01/2022 09:28, Ulf Hansson wrote:
>>> On Mon, 10 Jan 2022 at 16:55, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>>>>
>>>> On 07/01/2022 16:54, Ulf Hansson wrote:
>>>>> [...]
>>>>>
>>>>>>>> +static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
>>>>>>>> +                              const struct dtpm_node *it, struct dtpm *parent)
>>>>>>>> +{
>>>>>>>> +       struct dtpm *dtpm;
>>>>>>>> +       int i, ret;
>>>>>>>> +
>>>>>>>> +       for (i = 0; hierarchy[i].name; i++) {
>>>>>>>> +
>>>>>>>> +               if (hierarchy[i].parent != it)
>>>>>>>> +                       continue;
>>>>>>>> +
>>>>>>>> +               dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
>>>>>>>> +               if (!dtpm || IS_ERR(dtpm))
>>>>>>>> +                       continue;
>>>>>>>> +
>>>>>>>> +               ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);
>>>>>>>
>>>>>>> Why do you need to recursively call dtpm_for_each_child() here?
>>>>>>>
>>>>>>> Is there a restriction on how the dtpm core code manages adding
>>>>>>> children/parents?
>>>>>>
>>>>>> [ ... ]
>>>>>>
>>>>>> The recursive call is needed given the structure of the tree in an array
>>>>>> in order to connect with the parent.
>>>>>
>>>>> Right, I believe I understand what you are trying to do here, but I am
>>>>> not sure if this is the best approach to do this. Maybe it is.
>>>>>
>>>>> The problem is that we are also allocating memory for a dtpm and we
>>>>> call dtpm_register() on it in this execution path - and this memory
>>>>> doesn't get freed up nor unregistered, if any of the later recursive
>>>>> calls to dtpm_for_each_child() fails.
>>>>>
>>>>> The point is, it looks like it can get rather messy with the recursive
>>>>> calls to cope with the error path. Maybe it's easier to store the
>>>>> allocated dtpms in a list somewhere and use this to also find a
>>>>> reference of a parent?
>>>>
>>>> I think it is better to continue the construction with other nodes even
>>>> some of them failed to create, it should be a non critical issue. As an
>>>> analogy, if one thermal zone fails to create, the other thermal zones
>>>> are not removed.
>>>
>>> Well, what if it fails because its "consumer part" is waiting for some
>>> resource to become available?
>>>
>>> Maybe the devfreq driver/subsystem isn't available yet and causes
>>> -EPROBE_DEFER, for example. Perhaps this isn't the way the dtpm
>>> registration works currently, but sure it's worth considering when
>>> going forward, no?
>>
>> It should be solved by the fact that the DTPM description is a module
>> and loaded after the system booted. The module loading ordering is
>> solved by userspace.
> 
> Ideally, yes. However, drivers/subsystems in the kernel should respect
> -EPROBE_DEFER. It's good practice to do that.

Certainly.

However, it does not make sense because dtpm is not a device and I don't
see a device returning EPROBE_DEFER right now.

Wanting to handle EPROBE_DEFER will make the code a gaz factory:
 - shall we destroy the hierarchy each time a device is returning a
EPROBE_DEFER ?
     * yes : then we need to recreate it every time we recall it and we
end with an empty tree in case of error
     * no : we have to keep track of what was created or not, in order
to attach the newly device to the tree with a the parent, etc ...

So an incredible complexity for actually having no device returning
EPROBE_DEFER.

In addition, let's imagine one of the component like cpufreq is a
module, no EPROBE_DEFER handling will prevent the description being
created before the cpufreq driver is loaded.

But... I agree the hierarchy creation function should be called after
all the devices were created. For that, I think the kernel is providing
what is needed:

1. We compile the SoC specific dtpm always as a module

	depends on ... && m

2. In the module we add the dependencies to other modules

	MODULE_SOFTDEP(post: panfrost)

And with that, all dependencies are explicitly described and the
hierarchy creation is safe.

Does it make sense ?
diff mbox series

Patch

diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index 8242e8c5ed77..b1ca339957e3 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -46,6 +46,7 @@  config IDLE_INJECT
 
 config DTPM
 	bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)"
+	depends on OF
 	help
 	  This enables support for the power capping for the dynamic
 	  thermal power management userspace engine.
diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 0fe70687c198..1611c86de5f5 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -23,6 +23,7 @@ 
 #include <linux/powercap.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
+#include <linux/of.h>
 
 #define DTPM_POWER_LIMIT_FLAG 0
 
@@ -461,19 +462,163 @@  int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 	return 0;
 }
 
-static int __init init_dtpm(void)
+static struct dtpm *dtpm_setup_virtual(const struct dtpm_node *hierarchy,
+				       struct dtpm *parent)
+{
+	struct dtpm *dtpm;
+	int ret;
+
+	dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL);
+	if (!dtpm)
+		return ERR_PTR(-ENOMEM);
+	dtpm_init(dtpm, NULL);
+
+	ret = dtpm_register(hierarchy->name, dtpm, parent);
+	if (ret) {
+		pr_err("Failed to register dtpm node '%s': %d\n",
+		       hierarchy->name, ret);
+		kfree(dtpm);
+		return ERR_PTR(ret);
+	}
+
+	return dtpm;
+}
+
+static struct dtpm *dtpm_setup_dt(const struct dtpm_node *hierarchy,
+				  struct dtpm *parent)
+{
+	struct dtpm_descr *dtpm_descr;
+	struct device_node *np;
+	int ret;
+
+	np = of_find_node_by_path(hierarchy->name);
+	if (!np) {
+		pr_err("Failed to find '%s'\n", hierarchy->name);
+		return ERR_PTR(-ENXIO);
+	}
+
+	for_each_dtpm_table(dtpm_descr) {
+
+		ret = dtpm_descr->setup(parent, np);
+		if (ret) {
+			pr_err("Failed to setup '%s': %d\n", hierarchy->name, ret);
+			of_node_put(np);
+			return ERR_PTR(ret);
+		}
+
+		of_node_put(np);
+	}
+
+	/*
+	 * By returning a NULL pointer, we let know the caller there
+	 * is no child for us as we are a leaf of the tree
+	 */
+	return NULL;
+}
+
+typedef struct dtpm * (*dtpm_node_callback_t)(const struct dtpm_node *, struct dtpm *);
+
+dtpm_node_callback_t dtpm_node_callback[] = {
+	[DTPM_NODE_VIRTUAL] = dtpm_setup_virtual,
+	[DTPM_NODE_DT] = dtpm_setup_dt,
+};
+
+static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
+			       const struct dtpm_node *it, struct dtpm *parent)
+{
+	struct dtpm *dtpm;
+	int i, ret;
+
+	for (i = 0; hierarchy[i].name; i++) {
+
+		if (hierarchy[i].parent != it)
+			continue;
+
+		dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
+		if (!dtpm || IS_ERR(dtpm))
+			continue;
+
+		ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * dtpm_create_hierarchy - Create the dtpm hierarchy
+ * @hierarchy: An array of struct dtpm_node describing the hierarchy
+ *
+ * The function is called by the platform specific code with the
+ * description of the different node in the hierarchy. It creates the
+ * tree in the sysfs filesystem under the powercap dtpm entry.
+ *
+ * The expected tree has the format:
+ *
+ * struct dtpm_node hierarchy[] = {
+ *	[0] { .name = "topmost" },
+ *      [1] { .name = "package", .parent = &hierarchy[0] },
+ *      [2] { .name = "/cpus/cpu0", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ *      [3] { .name = "/cpus/cpu1", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ *      [4] { .name = "/cpus/cpu2", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ *      [5] { .name = "/cpus/cpu3", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ *	[6] { }
+ * };
+ *
+ * The last element is always an empty one and marks the end of the
+ * array.
+ *
+ * Return: zero on success, a negative value in case of error. Errors
+ * are reported back from the underlying functions.
+ */
+int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table)
 {
+	const struct of_device_id *match;
+	const struct dtpm_node *hierarchy;
 	struct dtpm_descr *dtpm_descr;
+	struct device_node *np;
+	int ret;
+
+	np = of_find_node_by_path("/");
+	if (!np)
+		return -ENODEV;
+
+	match = of_match_node(dtpm_match_table, np);
 
+	of_node_put(np);
+
+	if (!match)
+		return -ENODEV;
+
+	hierarchy = match->data;
+	if (!hierarchy)
+		return -EFAULT;
+
+	ret = dtpm_for_each_child(hierarchy, NULL, NULL);
+	if (ret)
+		return ret;
+	
+	for_each_dtpm_table(dtpm_descr) {
+
+		if (!dtpm_descr->init)
+			continue;
+
+		dtpm_descr->init();
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dtpm_create_hierarchy);
+
+static int __init init_dtpm(void)
+{
 	pct = powercap_register_control_type(NULL, "dtpm", NULL);
 	if (IS_ERR(pct)) {
 		pr_err("Failed to register control type\n");
 		return PTR_ERR(pct);
 	}
 
-	for_each_dtpm_table(dtpm_descr)
-		dtpm_descr->init();
-
 	return 0;
 }
-late_initcall(init_dtpm);
+fs_initcall_sync(init_dtpm);
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index b740866b228d..6bffb44c75aa 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -269,4 +269,4 @@  static int __init dtpm_cpu_init(void)
 	return 0;
 }
 
-DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init);
+DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init, NULL);
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index d37e5d06a357..5a6b31eaf7e4 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -32,23 +32,39 @@  struct dtpm_ops {
 	void (*release)(struct dtpm *);
 };
 
+struct device_node;
+
 typedef int (*dtpm_init_t)(void);
+typedef int (*dtpm_setup_t)(struct dtpm *, struct device_node *);
 
 struct dtpm_descr {
 	dtpm_init_t init;
+	dtpm_setup_t setup;
+};
+
+enum DTPM_NODE_TYPE {
+	DTPM_NODE_VIRTUAL = 0,
+	DTPM_NODE_DT,
+};
+
+struct dtpm_node {
+	enum DTPM_NODE_TYPE type;
+	const char *name;
+	struct dtpm_node *parent;
 };
 
 /* Init section thermal table */
 extern struct dtpm_descr __dtpm_table[];
 extern struct dtpm_descr __dtpm_table_end[];
 
-#define DTPM_TABLE_ENTRY(name, __init)				\
+#define DTPM_TABLE_ENTRY(name, __init, __setup)			\
 	static struct dtpm_descr __dtpm_table_entry_##name	\
 	__used __section("__dtpm_table") = {			\
 		.init = __init,					\
+		.setup = __setup,				\
 	}
 
-#define DTPM_DECLARE(name, init)	DTPM_TABLE_ENTRY(name, init)
+#define DTPM_DECLARE(name, init, setup)	DTPM_TABLE_ENTRY(name, init, setup)
 
 #define for_each_dtpm_table(__dtpm)	\
 	for (__dtpm = __dtpm_table;	\
@@ -70,4 +86,5 @@  void dtpm_unregister(struct dtpm *dtpm);
 
 int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent);
 
+int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table);
 #endif