diff mbox series

[RFC,4/5] thermal: Add support for setting polling interval

Message ID 20200504181616.175477-5-srinivas.pandruvada@linux.intel.com (mailing list archive)
State RFC
Headers show
Series thermal: Add new mechanism to get thermal notification | expand

Commit Message

Srinivas Pandruvada May 4, 2020, 6:16 p.m. UTC
Add new attribute in the thermal syfs for setting temperature sampling
interval when CONFIG_THERMAL_USER_EVENT_INTERFACE is defined. The default
value is 0, which means no polling.

At this interval user space will get an event THERMAL_TEMP_SAMPLE with
temperature sample. This reuses existing polling mecahnism when polling
or passive delay is specified during zone registry. To avoid interference
with passive and polling delay, this new polling attribute can't be used
for those zones.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/thermal/thermal_core.c  |  7 +++++++
 drivers/thermal/thermal_sysfs.c | 36 +++++++++++++++++++++++++++++++--
 include/linux/thermal.h         |  1 +
 3 files changed, 42 insertions(+), 2 deletions(-)

Comments

Daniel Lezcano May 18, 2020, 4:51 p.m. UTC | #1
On 04/05/2020 20:16, Srinivas Pandruvada wrote:
> Add new attribute in the thermal syfs for setting temperature sampling
> interval when CONFIG_THERMAL_USER_EVENT_INTERFACE is defined. The default
> value is 0, which means no polling.
> 
> At this interval user space will get an event THERMAL_TEMP_SAMPLE with
> temperature sample. This reuses existing polling mecahnism when polling
> or passive delay is specified during zone registry. To avoid interference
> with passive and polling delay, this new polling attribute can't be used
> for those zones.

The userspace can get the temperature whenever it wants via the
temperature file. The polling is designed for a specific hardware and
the slope of the temperature graphic.

The userspace has the alternative of reading the temperature based on
its own timer or wait for (and stick to) the thermal framework sampling
rate. Adding a notification in the update is enough IMO.


> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> ---
>  drivers/thermal/thermal_core.c  |  7 +++++++
>  drivers/thermal/thermal_sysfs.c | 36 +++++++++++++++++++++++++++++++--
>  include/linux/thermal.h         |  1 +
>  3 files changed, 42 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 14770d882d42..17cd799b0073 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -313,6 +313,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz)
>  		thermal_zone_device_set_polling(tz, tz->passive_delay);
>  	else if (tz->polling_delay)
>  		thermal_zone_device_set_polling(tz, tz->polling_delay);
> +	else if (tz->temp_polling_delay)
> +		thermal_zone_device_set_polling(tz, tz->temp_polling_delay);
>  	else
>  		thermal_zone_device_set_polling(tz, 0);
>  
> @@ -446,6 +448,11 @@ static void update_temperature(struct thermal_zone_device *tz)
>  	tz->temperature = temp;
>  	mutex_unlock(&tz->lock);
>  
> +	if (tz->temp_polling_delay) {
> +		thermal_dev_send_event(tz->id, THERMAL_TEMP_SAMPLE, temp);
> +		monitor_thermal_zone(tz);
> +	}
> +
>  	trace_thermal_temperature(tz);
>  	if (tz->last_temperature == THERMAL_TEMP_INVALID)
>  		dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
> diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
> index aa85424c3ac4..0df7997993fe 100644
> --- a/drivers/thermal/thermal_sysfs.c
> +++ b/drivers/thermal/thermal_sysfs.c
> @@ -248,6 +248,36 @@ create_thres_attr(temp_thres_low);
>  create_thres_attr(temp_thres_high);
>  create_thres_attr(temp_thres_hyst);
>  
> +static ssize_t
> +temp_polling_delay_store(struct device *dev, struct device_attribute *attr,
> +		   const char *buf, size_t count)
> +{
> +	struct thermal_zone_device *tz = to_thermal_zone(dev);
> +	int val;
> +
> +	if (kstrtoint(buf, 10, &val))
> +		return -EINVAL;
> +
> +	if (val && val < 1000)
> +		return -EINVAL;
> +
> +	tz->temp_polling_delay = val;
> +	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> +
> +	return count;
> +}
> +
> +static ssize_t
> +temp_polling_delay_show(struct device *dev, struct device_attribute *attr,
> +		     char *buf)
> +{
> +	struct thermal_zone_device *tz = to_thermal_zone(dev);
> +
> +	return sprintf(buf, "%d\n", tz->temp_polling_delay);
> +}
> +
> +static DEVICE_ATTR_RW(temp_polling_delay);
> +
>  static int create_user_events_attrs(struct thermal_zone_device *tz)
>  {
>  	struct attribute **attrs;
> @@ -260,8 +290,8 @@ static int create_user_events_attrs(struct thermal_zone_device *tz)
>  	if (tz->ops->get_temp_thres_high)
>  		++index;
>  
> -	/* One additional space for NULL */
> -	attrs = kcalloc(index + 1, sizeof(*attrs), GFP_KERNEL);
> +	/* One additional space for NULL and temp_pollling_delay */
> +	attrs = kcalloc(index + 2, sizeof(*attrs), GFP_KERNEL);
>  	if (!attrs)
>  		return -ENOMEM;
>  
> @@ -312,6 +342,8 @@ static int create_user_events_attrs(struct thermal_zone_device *tz)
>  		attrs[index] = &tz->threshold_attrs[index].attr.attr;
>  		++index;
>  	}
> +	if (!tz->polling_delay && !tz->passive_delay)
> +		attrs[index++] = &dev_attr_temp_polling_delay.attr;
>  	attrs[index] = NULL;
>  	tz->threshold_attribute_group.attrs = attrs;
>  
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index ee9d79ace7ce..0ec4bd8c9c5c 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -216,6 +216,7 @@ struct thermal_zone_device {
>  	enum thermal_notify_event notify_event;
>  	struct attribute_group threshold_attribute_group;
>  	struct thermal_attr *threshold_attrs;
> +	int temp_polling_delay;
>  };
>  
>  /**
>
Srinivas Pandruvada May 18, 2020, 11:46 p.m. UTC | #2
On Mon, 2020-05-18 at 18:51 +0200, Daniel Lezcano wrote:
> On 04/05/2020 20:16, Srinivas Pandruvada wrote:
> > Add new attribute in the thermal syfs for setting temperature
> > sampling
> > interval when CONFIG_THERMAL_USER_EVENT_INTERFACE is defined. The
> > default
> > value is 0, which means no polling.
> > 
> > At this interval user space will get an event THERMAL_TEMP_SAMPLE
> > with
> > temperature sample. This reuses existing polling mecahnism when
> > polling
> > or passive delay is specified during zone registry. To avoid
> > interference
> > with passive and polling delay, this new polling attribute can't be
> > used
> > for those zones.
> 
> The userspace can get the temperature whenever it wants via the
> temperature file. The polling is designed for a specific hardware and
> the slope of the temperature graphic.
> 
> The userspace has the alternative of reading the temperature based on
> its own timer or wait for (and stick to) the thermal framework
> sampling
> rate. Adding a notification in the update is enough IMO.
> 
The problem with this approach is that the user can't change sampling
interval. Those polling intervals are fixed during thermal-zone
register. Is there any way to change those defaults from user space?

Kernel can start with some long polling interval and user space can
change close to some trip.

Thanks,
Srinivas




> 
> > Signed-off-by: Srinivas Pandruvada <
> > srinivas.pandruvada@linux.intel.com>
> > ---
> >  drivers/thermal/thermal_core.c  |  7 +++++++
> >  drivers/thermal/thermal_sysfs.c | 36
> > +++++++++++++++++++++++++++++++--
> >  include/linux/thermal.h         |  1 +
> >  3 files changed, 42 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/thermal/thermal_core.c
> > b/drivers/thermal/thermal_core.c
> > index 14770d882d42..17cd799b0073 100644
> > --- a/drivers/thermal/thermal_core.c
> > +++ b/drivers/thermal/thermal_core.c
> > @@ -313,6 +313,8 @@ static void monitor_thermal_zone(struct
> > thermal_zone_device *tz)
> >  		thermal_zone_device_set_polling(tz, tz->passive_delay);
> >  	else if (tz->polling_delay)
> >  		thermal_zone_device_set_polling(tz, tz->polling_delay);
> > +	else if (tz->temp_polling_delay)
> > +		thermal_zone_device_set_polling(tz, tz-
> > >temp_polling_delay);
> >  	else
> >  		thermal_zone_device_set_polling(tz, 0);
> >  
> > @@ -446,6 +448,11 @@ static void update_temperature(struct
> > thermal_zone_device *tz)
> >  	tz->temperature = temp;
> >  	mutex_unlock(&tz->lock);
> >  
> > +	if (tz->temp_polling_delay) {
> > +		thermal_dev_send_event(tz->id, THERMAL_TEMP_SAMPLE,
> > temp);
> > +		monitor_thermal_zone(tz);
> > +	}
> > +
> >  	trace_thermal_temperature(tz);
> >  	if (tz->last_temperature == THERMAL_TEMP_INVALID)
> >  		dev_dbg(&tz->device, "last_temperature N/A,
> > current_temperature=%d\n",
> > diff --git a/drivers/thermal/thermal_sysfs.c
> > b/drivers/thermal/thermal_sysfs.c
> > index aa85424c3ac4..0df7997993fe 100644
> > --- a/drivers/thermal/thermal_sysfs.c
> > +++ b/drivers/thermal/thermal_sysfs.c
> > @@ -248,6 +248,36 @@ create_thres_attr(temp_thres_low);
> >  create_thres_attr(temp_thres_high);
> >  create_thres_attr(temp_thres_hyst);
> >  
> > +static ssize_t
> > +temp_polling_delay_store(struct device *dev, struct
> > device_attribute *attr,
> > +		   const char *buf, size_t count)
> > +{
> > +	struct thermal_zone_device *tz = to_thermal_zone(dev);
> > +	int val;
> > +
> > +	if (kstrtoint(buf, 10, &val))
> > +		return -EINVAL;
> > +
> > +	if (val && val < 1000)
> > +		return -EINVAL;
> > +
> > +	tz->temp_polling_delay = val;
> > +	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> > +
> > +	return count;
> > +}
> > +
> > +static ssize_t
> > +temp_polling_delay_show(struct device *dev, struct
> > device_attribute *attr,
> > +		     char *buf)
> > +{
> > +	struct thermal_zone_device *tz = to_thermal_zone(dev);
> > +
> > +	return sprintf(buf, "%d\n", tz->temp_polling_delay);
> > +}
> > +
> > +static DEVICE_ATTR_RW(temp_polling_delay);
> > +
> >  static int create_user_events_attrs(struct thermal_zone_device
> > *tz)
> >  {
> >  	struct attribute **attrs;
> > @@ -260,8 +290,8 @@ static int create_user_events_attrs(struct
> > thermal_zone_device *tz)
> >  	if (tz->ops->get_temp_thres_high)
> >  		++index;
> >  
> > -	/* One additional space for NULL */
> > -	attrs = kcalloc(index + 1, sizeof(*attrs), GFP_KERNEL);
> > +	/* One additional space for NULL and temp_pollling_delay */
> > +	attrs = kcalloc(index + 2, sizeof(*attrs), GFP_KERNEL);
> >  	if (!attrs)
> >  		return -ENOMEM;
> >  
> > @@ -312,6 +342,8 @@ static int create_user_events_attrs(struct
> > thermal_zone_device *tz)
> >  		attrs[index] = &tz->threshold_attrs[index].attr.attr;
> >  		++index;
> >  	}
> > +	if (!tz->polling_delay && !tz->passive_delay)
> > +		attrs[index++] = &dev_attr_temp_polling_delay.attr;
> >  	attrs[index] = NULL;
> >  	tz->threshold_attribute_group.attrs = attrs;
> >  
> > diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> > index ee9d79ace7ce..0ec4bd8c9c5c 100644
> > --- a/include/linux/thermal.h
> > +++ b/include/linux/thermal.h
> > @@ -216,6 +216,7 @@ struct thermal_zone_device {
> >  	enum thermal_notify_event notify_event;
> >  	struct attribute_group threshold_attribute_group;
> >  	struct thermal_attr *threshold_attrs;
> > +	int temp_polling_delay;
> >  };
> >  
> >  /**
> > 
> 
>
Daniel Lezcano May 19, 2020, 10:25 a.m. UTC | #3
On 19/05/2020 01:46, Srinivas Pandruvada wrote:
> On Mon, 2020-05-18 at 18:51 +0200, Daniel Lezcano wrote:
>> On 04/05/2020 20:16, Srinivas Pandruvada wrote:
>>> Add new attribute in the thermal syfs for setting temperature
>>> sampling
>>> interval when CONFIG_THERMAL_USER_EVENT_INTERFACE is defined. The
>>> default
>>> value is 0, which means no polling.
>>>
>>> At this interval user space will get an event THERMAL_TEMP_SAMPLE
>>> with
>>> temperature sample. This reuses existing polling mecahnism when
>>> polling
>>> or passive delay is specified during zone registry. To avoid
>>> interference
>>> with passive and polling delay, this new polling attribute can't be
>>> used
>>> for those zones.
>>
>> The userspace can get the temperature whenever it wants via the
>> temperature file. The polling is designed for a specific hardware and
>> the slope of the temperature graphic.
>>
>> The userspace has the alternative of reading the temperature based on
>> its own timer or wait for (and stick to) the thermal framework
>> sampling
>> rate. Adding a notification in the update is enough IMO.
>>
> The problem with this approach is that the user can't change sampling
> interval. Those polling intervals are fixed during thermal-zone
> register. Is there any way to change those defaults from user space?

No, we can't but the userspace can decide when to read the temperature
(via sysfs or netlink) and thus decide its own sampling rate.

Otherwise, we are talking about an userspace governor, so the platform
is setup with the desired sampling rate + userspace governor.

> Kernel can start with some long polling interval and user space can
> change close to some trip.

Ok, let me rephrase it. This (big) comment encompass also patch 3/5.

I understood now the initial need of adding user trip points.

There are platforms where the interrupt mode does not exist so setting
an user trip point does not set the interrupt for the closer
temperature, hence we end up with a kernel sampling rate and in this
case adding a trip point + new user sampling rate is pointless as the
userspace can poll the temperature at its convenient rate.

If we summarize the different combinations we have:

1. monitoring : interrupt mode, mitigation : interrupt mode

There are no thermal zone update until an interrupt fires. The
mitigation is based on trip point crossed.

2. monitoring : interrupt mode, mitigation : polling

There are no thermal zone update until an interrupt fires. The
mitigation happens with a sampling rate specified with the polling rate.

3. monitoring : polling, mitigation : polling

The thermal zone is updated at the polling rate, the mitigation occurs
with an update at the second polling rate.

IIUC, the RFC proposes to add a new type of temperature threshold,
followed a new polling rate to update the userspace.

IMHO, it is not a good thing to delegate to the kernel what the
userspace can handle easily.

I suggest:

 - Not add another polling rate. If the thermal zone has a polling rate
or supports the interrupt mode, then the user trip point setup succeed
otherwise it fails and up to the userspace to read the temperature at
its convenient rate. (Note multiple process may want to get temperature,
so one should not set the rate of others).

 - Not add another temp threshold structure but add a new trip type
"user" and keep using the existing trip structures, so the notification
can happen in the handle_trip_point function. The sysfs only reflects
the setup via the "trip_point_x_hyst", "trip_point_0_temp",
"trip_point_x_type"

 - Do not use sysfs for setup but rely on the genetlink for one message
setup instead of multiple sysfs file writing. Adding a trip point will
be straighforward.


What do you think?
Amit Kucheria May 20, 2020, 4:38 a.m. UTC | #4
On Mon, May 4, 2020 at 11:47 PM Srinivas Pandruvada
<srinivas.pandruvada@linux.intel.com> wrote:
>
> Add new attribute in the thermal syfs for setting temperature sampling
> interval when CONFIG_THERMAL_USER_EVENT_INTERFACE is defined. The default
> value is 0, which means no polling.
>
> At this interval user space will get an event THERMAL_TEMP_SAMPLE with
> temperature sample. This reuses existing polling mecahnism when polling
> or passive delay is specified during zone registry. To avoid interference
> with passive and polling delay, this new polling attribute can't be used
> for those zones.

Why should the kernel periodically emit events for userspace when the
userspace is perfectly capable of deciding how frequently it wants to
poll a file for changes?

>
> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> ---
>  drivers/thermal/thermal_core.c  |  7 +++++++
>  drivers/thermal/thermal_sysfs.c | 36 +++++++++++++++++++++++++++++++--
>  include/linux/thermal.h         |  1 +
>  3 files changed, 42 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 14770d882d42..17cd799b0073 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -313,6 +313,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz)
>                 thermal_zone_device_set_polling(tz, tz->passive_delay);
>         else if (tz->polling_delay)
>                 thermal_zone_device_set_polling(tz, tz->polling_delay);
> +       else if (tz->temp_polling_delay)
> +               thermal_zone_device_set_polling(tz, tz->temp_polling_delay);
>         else
>                 thermal_zone_device_set_polling(tz, 0);
>
> @@ -446,6 +448,11 @@ static void update_temperature(struct thermal_zone_device *tz)
>         tz->temperature = temp;
>         mutex_unlock(&tz->lock);
>
> +       if (tz->temp_polling_delay) {
> +               thermal_dev_send_event(tz->id, THERMAL_TEMP_SAMPLE, temp);
> +               monitor_thermal_zone(tz);
> +       }
> +
>         trace_thermal_temperature(tz);
>         if (tz->last_temperature == THERMAL_TEMP_INVALID)
>                 dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
> diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
> index aa85424c3ac4..0df7997993fe 100644
> --- a/drivers/thermal/thermal_sysfs.c
> +++ b/drivers/thermal/thermal_sysfs.c
> @@ -248,6 +248,36 @@ create_thres_attr(temp_thres_low);
>  create_thres_attr(temp_thres_high);
>  create_thres_attr(temp_thres_hyst);
>
> +static ssize_t
> +temp_polling_delay_store(struct device *dev, struct device_attribute *attr,
> +                  const char *buf, size_t count)
> +{
> +       struct thermal_zone_device *tz = to_thermal_zone(dev);
> +       int val;
> +
> +       if (kstrtoint(buf, 10, &val))
> +               return -EINVAL;
> +
> +       if (val && val < 1000)
> +               return -EINVAL;
> +
> +       tz->temp_polling_delay = val;
> +       thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> +
> +       return count;
> +}
> +
> +static ssize_t
> +temp_polling_delay_show(struct device *dev, struct device_attribute *attr,
> +                    char *buf)
> +{
> +       struct thermal_zone_device *tz = to_thermal_zone(dev);
> +
> +       return sprintf(buf, "%d\n", tz->temp_polling_delay);
> +}
> +
> +static DEVICE_ATTR_RW(temp_polling_delay);
> +
>  static int create_user_events_attrs(struct thermal_zone_device *tz)
>  {
>         struct attribute **attrs;
> @@ -260,8 +290,8 @@ static int create_user_events_attrs(struct thermal_zone_device *tz)
>         if (tz->ops->get_temp_thres_high)
>                 ++index;
>
> -       /* One additional space for NULL */
> -       attrs = kcalloc(index + 1, sizeof(*attrs), GFP_KERNEL);
> +       /* One additional space for NULL and temp_pollling_delay */
> +       attrs = kcalloc(index + 2, sizeof(*attrs), GFP_KERNEL);
>         if (!attrs)
>                 return -ENOMEM;
>
> @@ -312,6 +342,8 @@ static int create_user_events_attrs(struct thermal_zone_device *tz)
>                 attrs[index] = &tz->threshold_attrs[index].attr.attr;
>                 ++index;
>         }
> +       if (!tz->polling_delay && !tz->passive_delay)
> +               attrs[index++] = &dev_attr_temp_polling_delay.attr;
>         attrs[index] = NULL;
>         tz->threshold_attribute_group.attrs = attrs;
>
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index ee9d79ace7ce..0ec4bd8c9c5c 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -216,6 +216,7 @@ struct thermal_zone_device {
>         enum thermal_notify_event notify_event;
>         struct attribute_group threshold_attribute_group;
>         struct thermal_attr *threshold_attrs;
> +       int temp_polling_delay;
>  };
>
>  /**
> --
> 2.25.4
>
Srinivas Pandruvada May 21, 2020, 10:26 p.m. UTC | #5
On Tue, 2020-05-19 at 12:25 +0200, Daniel Lezcano wrote:
> On 19/05/2020 01:46, Srinivas Pandruvada wrote:
> > On Mon, 2020-05-18 at 18:51 +0200, Daniel Lezcano wrote:
> > > On 04/05/2020 20:16, Srinivas Pandruvada wrote:
> > > > Add new attribute in the thermal syfs for setting temperature
> > > > sampling
> > > > interval when CONFIG_THERMAL_USER_EVENT_INTERFACE is defined.
> > > > The
> > > > default
> > > > value is 0, which means no polling.
> > > > 
> > > > At this interval user space will get an event
> > > > THERMAL_TEMP_SAMPLE
> > > > with
> > > > temperature sample. This reuses existing polling mecahnism when
> > > > polling
> > > > or passive delay is specified during zone registry. To avoid
> > > > interference
> > > > with passive and polling delay, this new polling attribute
> > > > can't be
> > > > used
> > > > for those zones.
> > > 
> > > The userspace can get the temperature whenever it wants via the
> > > temperature file. The polling is designed for a specific hardware
> > > and
> > > the slope of the temperature graphic.
> > > 
> > > The userspace has the alternative of reading the temperature
> > > based on
> > > its own timer or wait for (and stick to) the thermal framework
> > > sampling
> > > rate. Adding a notification in the update is enough IMO.
> > > 
> > The problem with this approach is that the user can't change
> > sampling
> > interval. Those polling intervals are fixed during thermal-zone
> > register. Is there any way to change those defaults from user
> > space?
> 
> No, we can't but the userspace can decide when to read the
> temperature
> (via sysfs or netlink) and thus decide its own sampling rate.

Yes, if we poll for temperature from user space, no change is required
neighter netlink nor kfifo.

The average time to read CPU temperature and convert to int from sysfs
takes 45us vs 7us for push via Kfifo. I haven't looked at your patches
and checked this time. If it is comparable then netlink is better.


> 
> Otherwise, we are talking about an userspace governor, so the
> platform
> is setup with the desired sampling rate + userspace governor.
> 
> > Kernel can start with some long polling interval and user space can
> > change close to some trip.
> 
> Ok, let me rephrase it. This (big) comment encompass also patch 3/5.
> 
> I understood now the initial need of adding user trip points.
> 
> There are platforms where the interrupt mode does not exist so
> setting
> an user trip point does not set the interrupt for the closer
> temperature, hence we end up with a kernel sampling rate and in this
> case adding a trip point + new user sampling rate is pointless as the
> userspace can poll the temperature at its convenient rate.
> 
> If we summarize the different combinations we have:
> 
> 1. monitoring : interrupt mode, mitigation : interrupt mode
> 
> There are no thermal zone update until an interrupt fires. The
> mitigation is based on trip point crossed.

Yes. Basically daemon sleeps, till it gets a netlink notification
currently.

> 
> 2. monitoring : interrupt mode, mitigation : polling
> 
> There are no thermal zone update until an interrupt fires. The
> mitigation happens with a sampling rate specified with the polling
> rate.
> 
More complex than this. interrupt fires but it will fire flood for any
change (+/- around threshold) so after the first trigger, just disable
interrupt for an interval and start polling from user space as this is
more efficient.

> 3. monitoring : polling, mitigation : polling
> 
> The thermal zone is updated at the polling rate, the mitigation
> occurs
> with an update at the second polling rate.
> 
Yes.

> IIUC, the RFC proposes to add a new type of temperature threshold,
> followed a new polling rate to update the userspace.
> 
> IMHO, it is not a good thing to delegate to the kernel what the
> userspace can handle easily.
> 
> I suggest:
> 
>  - Not add another polling rate. If the thermal zone has a polling
> rate
> or supports the interrupt mode, then the user trip point setup
> succeed
> otherwise it fails and up to the userspace to read the temperature at
> its convenient rate. (Note multiple process may want to get
> temperature,
> so one should not set the rate of others).

Fine.

> 
>  - Not add another temp threshold structure but add a new trip type
> "user" and keep using the existing trip structures, so the
> notification
> can happen in the handle_trip_point function. The sysfs only reflects
> the setup via the "trip_point_x_hyst", "trip_point_0_temp",
> "trip_point_x_type"

Fine. It is better than what we re-purpose a passive trip currently.

> 
>  - Do not use sysfs for setup but rely on the genetlink for one
> message
> setup instead of multiple sysfs file writing. Adding a trip point
> will

Fine.

> 
> 
> What do you think?
Looks good.

Thanks,
Srinivas

> 
>
diff mbox series

Patch

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 14770d882d42..17cd799b0073 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -313,6 +313,8 @@  static void monitor_thermal_zone(struct thermal_zone_device *tz)
 		thermal_zone_device_set_polling(tz, tz->passive_delay);
 	else if (tz->polling_delay)
 		thermal_zone_device_set_polling(tz, tz->polling_delay);
+	else if (tz->temp_polling_delay)
+		thermal_zone_device_set_polling(tz, tz->temp_polling_delay);
 	else
 		thermal_zone_device_set_polling(tz, 0);
 
@@ -446,6 +448,11 @@  static void update_temperature(struct thermal_zone_device *tz)
 	tz->temperature = temp;
 	mutex_unlock(&tz->lock);
 
+	if (tz->temp_polling_delay) {
+		thermal_dev_send_event(tz->id, THERMAL_TEMP_SAMPLE, temp);
+		monitor_thermal_zone(tz);
+	}
+
 	trace_thermal_temperature(tz);
 	if (tz->last_temperature == THERMAL_TEMP_INVALID)
 		dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index aa85424c3ac4..0df7997993fe 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -248,6 +248,36 @@  create_thres_attr(temp_thres_low);
 create_thres_attr(temp_thres_high);
 create_thres_attr(temp_thres_hyst);
 
+static ssize_t
+temp_polling_delay_store(struct device *dev, struct device_attribute *attr,
+		   const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int val;
+
+	if (kstrtoint(buf, 10, &val))
+		return -EINVAL;
+
+	if (val && val < 1000)
+		return -EINVAL;
+
+	tz->temp_polling_delay = val;
+	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
+
+	return count;
+}
+
+static ssize_t
+temp_polling_delay_show(struct device *dev, struct device_attribute *attr,
+		     char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+
+	return sprintf(buf, "%d\n", tz->temp_polling_delay);
+}
+
+static DEVICE_ATTR_RW(temp_polling_delay);
+
 static int create_user_events_attrs(struct thermal_zone_device *tz)
 {
 	struct attribute **attrs;
@@ -260,8 +290,8 @@  static int create_user_events_attrs(struct thermal_zone_device *tz)
 	if (tz->ops->get_temp_thres_high)
 		++index;
 
-	/* One additional space for NULL */
-	attrs = kcalloc(index + 1, sizeof(*attrs), GFP_KERNEL);
+	/* One additional space for NULL and temp_pollling_delay */
+	attrs = kcalloc(index + 2, sizeof(*attrs), GFP_KERNEL);
 	if (!attrs)
 		return -ENOMEM;
 
@@ -312,6 +342,8 @@  static int create_user_events_attrs(struct thermal_zone_device *tz)
 		attrs[index] = &tz->threshold_attrs[index].attr.attr;
 		++index;
 	}
+	if (!tz->polling_delay && !tz->passive_delay)
+		attrs[index++] = &dev_attr_temp_polling_delay.attr;
 	attrs[index] = NULL;
 	tz->threshold_attribute_group.attrs = attrs;
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index ee9d79ace7ce..0ec4bd8c9c5c 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -216,6 +216,7 @@  struct thermal_zone_device {
 	enum thermal_notify_event notify_event;
 	struct attribute_group threshold_attribute_group;
 	struct thermal_attr *threshold_attrs;
+	int temp_polling_delay;
 };
 
 /**