diff mbox series

[3/3] drm/i915/hwmon: Expose power1_max_enable

Message ID 20230214053342.1952226-4-ashutosh.dixit@intel.com (mailing list archive)
State Handled Elsewhere
Headers show
Series PL1 power limit fixes for ATSM | expand

Commit Message

Dixit, Ashutosh Feb. 14, 2023, 5:33 a.m. UTC
On ATSM the PL1 power limit is disabled at power up. The previous uapi
assumed that the PL1 limit is always enabled and therefore did not have a
notion of a disabled PL1 limit. This results in erroneous PL1 limit values
when PL1 limit is disabled. For example at power up, the disabled ATSM PL1
limit is shown as 0 which means a low PL1 limit whereas the limit being
disabled actually implies a high effective PL1 limit value.

To get round this problem, expose power1_max_enable as a custom hwmon
attribute. power1_max_enable can be used in conjunction with power1_max to
interpret power1_max (PL1 limit) values correctly. It can also be used to
enable/disable the PL1 power limit.

Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
 .../ABI/testing/sysfs-driver-intel-i915-hwmon |  7 +++
 drivers/gpu/drm/i915/i915_hwmon.c             | 48 +++++++++++++++++--
 2 files changed, 51 insertions(+), 4 deletions(-)

Comments

Guenter Roeck Feb. 14, 2023, 6:16 a.m. UTC | #1
On 2/13/23 21:33, Ashutosh Dixit wrote:
> On ATSM the PL1 power limit is disabled at power up. The previous uapi
> assumed that the PL1 limit is always enabled and therefore did not have a
> notion of a disabled PL1 limit. This results in erroneous PL1 limit values
> when PL1 limit is disabled. For example at power up, the disabled ATSM PL1
> limit is shown as 0 which means a low PL1 limit whereas the limit being
> disabled actually implies a high effective PL1 limit value.
> 
> To get round this problem, expose power1_max_enable as a custom hwmon
> attribute. power1_max_enable can be used in conjunction with power1_max to
> interpret power1_max (PL1 limit) values correctly. It can also be used to
> enable/disable the PL1 power limit.
> 
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> ---
>   .../ABI/testing/sysfs-driver-intel-i915-hwmon |  7 +++
>   drivers/gpu/drm/i915/i915_hwmon.c             | 48 +++++++++++++++++--
>   2 files changed, 51 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> index 2d6a472eef885..edd94a44b4570 100644
> --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> @@ -18,6 +18,13 @@ Description:	RW. Card reactive sustained  (PL1/Tau) power limit in microwatts.
>   
>   		Only supported for particular Intel i915 graphics platforms.
>   
> +What:		/sys/devices/.../hwmon/hwmon<i>/power1_max_enable

This is not a standard hwmon attribute. The standard attribute would be power1_enable.

So from hwmon perspective this is a NACK.

Guenter

> +Date:		May 2023
> +KernelVersion:	6.3
> +Contact:	intel-gfx@lists.freedesktop.org
> +Description:	RW. Enable/disable the PL1 power limit (power1_max).
> +
> +		Only supported for particular Intel i915 graphics platforms.
>   What:		/sys/devices/.../hwmon/hwmon<i>/power1_rated_max
>   Date:		February 2023
>   KernelVersion:	6.2
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> index 7c20a6f47b92e..5665869d8602b 100644
> --- a/drivers/gpu/drm/i915/i915_hwmon.c
> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> @@ -230,13 +230,52 @@ hwm_power1_max_interval_store(struct device *dev,
>   					    PKG_PWR_LIM_1_TIME, rxy);
>   	return count;
>   }
> +static SENSOR_DEVICE_ATTR_RW(power1_max_interval, hwm_power1_max_interval, 0);
>   
> -static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
> -			  hwm_power1_max_interval_show,
> -			  hwm_power1_max_interval_store, 0);
> +static ssize_t
> +hwm_power1_max_enable_show(struct device *dev, struct device_attribute *attr, char *buf)
> +{
> +	struct hwm_drvdata *ddat = dev_get_drvdata(dev);
> +	intel_wakeref_t wakeref;
> +	u32 r;
> +
> +	with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
> +		r = intel_uncore_read(ddat->uncore, ddat->hwmon->rg.pkg_rapl_limit);
> +
> +	return sysfs_emit(buf, "%u\n", !!(r & PKG_PWR_LIM_1_EN));
> +}
> +
> +static ssize_t
> +hwm_power1_max_enable_store(struct device *dev, struct device_attribute *attr,
> +			    const char *buf, size_t count)
> +{
> +	struct hwm_drvdata *ddat = dev_get_drvdata(dev);
> +	intel_wakeref_t wakeref;
> +	u32 en, r;
> +	bool _en;
> +	int ret;
> +
> +	ret = kstrtobool(buf, &_en);
> +	if (ret)
> +		return ret;
> +
> +	en = REG_FIELD_PREP(PKG_PWR_LIM_1_EN, _en);
> +	hwm_locked_with_pm_intel_uncore_rmw(ddat, ddat->hwmon->rg.pkg_rapl_limit,
> +					    PKG_PWR_LIM_1_EN, en);
> +
> +	/* Verify, because PL1 limit cannot be disabled on all platforms */
> +	with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
> +		r = intel_uncore_read(ddat->uncore, ddat->hwmon->rg.pkg_rapl_limit);
> +	if ((r & PKG_PWR_LIM_1_EN) != en)
> +		return -EPERM;
> +
> +	return count;
> +}
> +static SENSOR_DEVICE_ATTR_RW(power1_max_enable, hwm_power1_max_enable, 0);
>   
>   static struct attribute *hwm_attributes[] = {
>   	&sensor_dev_attr_power1_max_interval.dev_attr.attr,
> +	&sensor_dev_attr_power1_max_enable.dev_attr.attr,
>   	NULL
>   };
>   
> @@ -247,7 +286,8 @@ static umode_t hwm_attributes_visible(struct kobject *kobj,
>   	struct hwm_drvdata *ddat = dev_get_drvdata(dev);
>   	struct i915_hwmon *hwmon = ddat->hwmon;
>   
> -	if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
> +	if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr ||
> +	    attr == &sensor_dev_attr_power1_max_enable.dev_attr.attr)
>   		return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? attr->mode : 0;
>   
>   	return 0;
Dixit, Ashutosh Feb. 15, 2023, 3:11 a.m. UTC | #2
On Mon, 13 Feb 2023 22:16:44 -0800, Guenter Roeck wrote:
>

Hi Guenter,

> On 2/13/23 21:33, Ashutosh Dixit wrote:
> > On ATSM the PL1 power limit is disabled at power up. The previous uapi
> > assumed that the PL1 limit is always enabled and therefore did not have a
> > notion of a disabled PL1 limit. This results in erroneous PL1 limit values
> > when PL1 limit is disabled. For example at power up, the disabled ATSM PL1
> > limit is shown as 0 which means a low PL1 limit whereas the limit being
> > disabled actually implies a high effective PL1 limit value.
> >
> > To get round this problem, expose power1_max_enable as a custom hwmon
> > attribute. power1_max_enable can be used in conjunction with power1_max to
> > interpret power1_max (PL1 limit) values correctly. It can also be used to
> > enable/disable the PL1 power limit.
> >
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> >   .../ABI/testing/sysfs-driver-intel-i915-hwmon |  7 +++
> >   drivers/gpu/drm/i915/i915_hwmon.c             | 48 +++++++++++++++++--
> >   2 files changed, 51 insertions(+), 4 deletions(-)
> >
> > diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> > index 2d6a472eef885..edd94a44b4570 100644
> > --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> > +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> > @@ -18,6 +18,13 @@ Description:	RW. Card reactive sustained  (PL1/Tau) power limit in microwatts.
> >			Only supported for particular Intel i915 graphics
> > platforms.
> >   +What:		/sys/devices/.../hwmon/hwmon<i>/power1_max_enable
>
> This is not a standard hwmon attribute. The standard attribute would be
> power1_enable.
>
> So from hwmon perspective this is a NACK.

Thanks for the feedback. I did consider power1_enable but decided to go
with the power1_max_enable custom attribute. Documentation for
power1_enable says it is to "Enable or disable the sensors" but in our case
we are not enabling/disabling sensors (which we don't have any ability to,
neither do we expose any power measurements, only energy from which power
can be derived) but enabling/disabling a "power limit" (a limit beyond
which HW takes steps to limit power).

As mentioned in the commit message, power1_max_enable is exposed to avoid
possible misinterpretations in measured energy in response to the set power
limit (something specific to our HW). We may have multiple such limits in
the future with similar issues and multiplexing enabling/disabling these
power limits via a single power1_enable file will not provide sufficient
granularity for our purposes.

Also, I had previously posted this patch:

https://patchwork.freedesktop.org/patch/522612/?series=113972&rev=1

which avoids the power1_max_enable file and instead uses a power1_max value
of -1 to indicate that the power1_max limit is disabled.

So in summary we have the following options:

1. Go with power1_max_enable (preferred, works well for us)
2. Go with -1 to indicate that the power1_max limit is disabled
   (non-intuitive and also a little ugly)
3. Go with power1_enable (possible but confusing because multiple power
   limits/entities are multiplexed via a single file)

If you still think we should not use power1_max_enable I think I might drop
this patch for now (I am trying to preempt future issues but in this case
it's better to wait till people actually complain rather than expose a
non-ideal uapi).

Even if drop we this patch now, it would be good to know your preference in
case we need to revisit this issue later.

Thanks and also sorry for the rather long winded email.

Ashutosh

> Guenter
>
> > +Date:		May 2023
> > +KernelVersion:	6.3
> > +Contact:	intel-gfx@lists.freedesktop.org
> > +Description:	RW. Enable/disable the PL1 power limit (power1_max).
> > +
> > +		Only supported for particular Intel i915 graphics platforms.
> >   What:		/sys/devices/.../hwmon/hwmon<i>/power1_rated_max
> >   Date:		February 2023
> >   KernelVersion:	6.2
> > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> > index 7c20a6f47b92e..5665869d8602b 100644
> > --- a/drivers/gpu/drm/i915/i915_hwmon.c
> > +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> > @@ -230,13 +230,52 @@ hwm_power1_max_interval_store(struct device *dev,
> >					    PKG_PWR_LIM_1_TIME, rxy);
> >	return count;
> >   }
> > +static SENSOR_DEVICE_ATTR_RW(power1_max_interval, hwm_power1_max_interval, 0);
> >   -static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
> > -			  hwm_power1_max_interval_show,
> > -			  hwm_power1_max_interval_store, 0);
> > +static ssize_t
> > +hwm_power1_max_enable_show(struct device *dev, struct device_attribute *attr, char *buf)
> > +{
> > +	struct hwm_drvdata *ddat = dev_get_drvdata(dev);
> > +	intel_wakeref_t wakeref;
> > +	u32 r;
> > +
> > +	with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
> > +		r = intel_uncore_read(ddat->uncore, ddat->hwmon->rg.pkg_rapl_limit);
> > +
> > +	return sysfs_emit(buf, "%u\n", !!(r & PKG_PWR_LIM_1_EN));
> > +}
> > +
> > +static ssize_t
> > +hwm_power1_max_enable_store(struct device *dev, struct device_attribute *attr,
> > +			    const char *buf, size_t count)
> > +{
> > +	struct hwm_drvdata *ddat = dev_get_drvdata(dev);
> > +	intel_wakeref_t wakeref;
> > +	u32 en, r;
> > +	bool _en;
> > +	int ret;
> > +
> > +	ret = kstrtobool(buf, &_en);
> > +	if (ret)
> > +		return ret;
> > +
> > +	en = REG_FIELD_PREP(PKG_PWR_LIM_1_EN, _en);
> > +	hwm_locked_with_pm_intel_uncore_rmw(ddat, ddat->hwmon->rg.pkg_rapl_limit,
> > +					    PKG_PWR_LIM_1_EN, en);
> > +
> > +	/* Verify, because PL1 limit cannot be disabled on all platforms */
> > +	with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
> > +		r = intel_uncore_read(ddat->uncore, ddat->hwmon->rg.pkg_rapl_limit);
> > +	if ((r & PKG_PWR_LIM_1_EN) != en)
> > +		return -EPERM;
> > +
> > +	return count;
> > +}
> > +static SENSOR_DEVICE_ATTR_RW(power1_max_enable, hwm_power1_max_enable, 0);
> >     static struct attribute *hwm_attributes[] = {
> >	&sensor_dev_attr_power1_max_interval.dev_attr.attr,
> > +	&sensor_dev_attr_power1_max_enable.dev_attr.attr,
> >	NULL
> >   };
> >   @@ -247,7 +286,8 @@ static umode_t hwm_attributes_visible(struct
> > kobject *kobj,
> >	struct hwm_drvdata *ddat = dev_get_drvdata(dev);
> >	struct i915_hwmon *hwmon = ddat->hwmon;
> >   -	if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
> > +	if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr ||
> > +	    attr == &sensor_dev_attr_power1_max_enable.dev_attr.attr)
> >		return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? attr->mode : 0;
> >		return 0;
>
Rodrigo Vivi Feb. 16, 2023, 6:57 p.m. UTC | #3
On Tue, Feb 14, 2023 at 07:11:16PM -0800, Dixit, Ashutosh wrote:
> On Mon, 13 Feb 2023 22:16:44 -0800, Guenter Roeck wrote:
> >
> 
> Hi Guenter,
> 
> > On 2/13/23 21:33, Ashutosh Dixit wrote:
> > > On ATSM the PL1 power limit is disabled at power up. The previous uapi
> > > assumed that the PL1 limit is always enabled and therefore did not have a
> > > notion of a disabled PL1 limit. This results in erroneous PL1 limit values
> > > when PL1 limit is disabled. For example at power up, the disabled ATSM PL1
> > > limit is shown as 0 which means a low PL1 limit whereas the limit being
> > > disabled actually implies a high effective PL1 limit value.
> > >
> > > To get round this problem, expose power1_max_enable as a custom hwmon
> > > attribute. power1_max_enable can be used in conjunction with power1_max to
> > > interpret power1_max (PL1 limit) values correctly. It can also be used to
> > > enable/disable the PL1 power limit.
> > >
> > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > > ---
> > >   .../ABI/testing/sysfs-driver-intel-i915-hwmon |  7 +++
> > >   drivers/gpu/drm/i915/i915_hwmon.c             | 48 +++++++++++++++++--
> > >   2 files changed, 51 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> > > index 2d6a472eef885..edd94a44b4570 100644
> > > --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> > > +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> > > @@ -18,6 +18,13 @@ Description:	RW. Card reactive sustained  (PL1/Tau) power limit in microwatts.
> > >			Only supported for particular Intel i915 graphics
> > > platforms.
> > >   +What:		/sys/devices/.../hwmon/hwmon<i>/power1_max_enable
> >
> > This is not a standard hwmon attribute. The standard attribute would be
> > power1_enable.
> >
> > So from hwmon perspective this is a NACK.
> 
> Thanks for the feedback. I did consider power1_enable but decided to go
> with the power1_max_enable custom attribute. Documentation for
> power1_enable says it is to "Enable or disable the sensors" but in our case
> we are not enabling/disabling sensors (which we don't have any ability to,
> neither do we expose any power measurements, only energy from which power
> can be derived) but enabling/disabling a "power limit" (a limit beyond
> which HW takes steps to limit power).

Hi Guenter,

are you okay with this explanation to release the previous 'nack'?

For me it looks like this case really doesn't fit in the standard ones.

But also this made me wonder what are the rules for non-standard cases?

I couldn't find any clear guidelines in here:
https://docs.kernel.org/hwmon/hwmon-kernel-api.html#driver-provided-sysfs-attributes

We are seeing drivers around to freely use non-standard hwmon.
Are we free to add non standard ones as long if doesn't fit in the defined
standards, or should we really limit the use and do our own thing on our own?

I mean, for the new Xe driver I was considering to standardize everything
related to freq and power on top of the hwmon instead of separated sysfs
files. But this would mean a lot of non-standard stuff on top of a few
standard hwmon stuff. But I will hold this plan if you tell me that we
should avoid and limit the non-standard cases.

> 
> As mentioned in the commit message, power1_max_enable is exposed to avoid
> possible misinterpretations in measured energy in response to the set power
> limit (something specific to our HW). We may have multiple such limits in
> the future with similar issues and multiplexing enabling/disabling these
> power limits via a single power1_enable file will not provide sufficient
> granularity for our purposes.
> 
> Also, I had previously posted this patch:
> 
> https://patchwork.freedesktop.org/patch/522612/?series=113972&rev=1
> 
> which avoids the power1_max_enable file and instead uses a power1_max value
> of -1 to indicate that the power1_max limit is disabled.
> 
> So in summary we have the following options:
> 
> 1. Go with power1_max_enable (preferred, works well for us)
> 2. Go with -1 to indicate that the power1_max limit is disabled
>    (non-intuitive and also a little ugly)
> 3. Go with power1_enable (possible but confusing because multiple power
>    limits/entities are multiplexed via a single file)
> 
> If you still think we should not use power1_max_enable I think I might drop
> this patch for now (I am trying to preempt future issues but in this case
> it's better to wait till people actually complain rather than expose a
> non-ideal uapi).
> 
> Even if drop we this patch now, it would be good to know your preference in
> case we need to revisit this issue later.
> 
> Thanks and also sorry for the rather long winded email.
> 
> Ashutosh
> 
> > Guenter
> >
> > > +Date:		May 2023
> > > +KernelVersion:	6.3
> > > +Contact:	intel-gfx@lists.freedesktop.org
> > > +Description:	RW. Enable/disable the PL1 power limit (power1_max).
> > > +
> > > +		Only supported for particular Intel i915 graphics platforms.
> > >   What:		/sys/devices/.../hwmon/hwmon<i>/power1_rated_max
> > >   Date:		February 2023
> > >   KernelVersion:	6.2
> > > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> > > index 7c20a6f47b92e..5665869d8602b 100644
> > > --- a/drivers/gpu/drm/i915/i915_hwmon.c
> > > +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> > > @@ -230,13 +230,52 @@ hwm_power1_max_interval_store(struct device *dev,
> > >					    PKG_PWR_LIM_1_TIME, rxy);
> > >	return count;
> > >   }
> > > +static SENSOR_DEVICE_ATTR_RW(power1_max_interval, hwm_power1_max_interval, 0);
> > >   -static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
> > > -			  hwm_power1_max_interval_show,
> > > -			  hwm_power1_max_interval_store, 0);
> > > +static ssize_t
> > > +hwm_power1_max_enable_show(struct device *dev, struct device_attribute *attr, char *buf)
> > > +{
> > > +	struct hwm_drvdata *ddat = dev_get_drvdata(dev);
> > > +	intel_wakeref_t wakeref;
> > > +	u32 r;
> > > +
> > > +	with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
> > > +		r = intel_uncore_read(ddat->uncore, ddat->hwmon->rg.pkg_rapl_limit);
> > > +
> > > +	return sysfs_emit(buf, "%u\n", !!(r & PKG_PWR_LIM_1_EN));
> > > +}
> > > +
> > > +static ssize_t
> > > +hwm_power1_max_enable_store(struct device *dev, struct device_attribute *attr,
> > > +			    const char *buf, size_t count)
> > > +{
> > > +	struct hwm_drvdata *ddat = dev_get_drvdata(dev);
> > > +	intel_wakeref_t wakeref;
> > > +	u32 en, r;
> > > +	bool _en;
> > > +	int ret;
> > > +
> > > +	ret = kstrtobool(buf, &_en);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	en = REG_FIELD_PREP(PKG_PWR_LIM_1_EN, _en);
> > > +	hwm_locked_with_pm_intel_uncore_rmw(ddat, ddat->hwmon->rg.pkg_rapl_limit,
> > > +					    PKG_PWR_LIM_1_EN, en);
> > > +
> > > +	/* Verify, because PL1 limit cannot be disabled on all platforms */
> > > +	with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
> > > +		r = intel_uncore_read(ddat->uncore, ddat->hwmon->rg.pkg_rapl_limit);
> > > +	if ((r & PKG_PWR_LIM_1_EN) != en)
> > > +		return -EPERM;
> > > +
> > > +	return count;
> > > +}
> > > +static SENSOR_DEVICE_ATTR_RW(power1_max_enable, hwm_power1_max_enable, 0);
> > >     static struct attribute *hwm_attributes[] = {
> > >	&sensor_dev_attr_power1_max_interval.dev_attr.attr,
> > > +	&sensor_dev_attr_power1_max_enable.dev_attr.attr,
> > >	NULL
> > >   };
> > >   @@ -247,7 +286,8 @@ static umode_t hwm_attributes_visible(struct
> > > kobject *kobj,
> > >	struct hwm_drvdata *ddat = dev_get_drvdata(dev);
> > >	struct i915_hwmon *hwmon = ddat->hwmon;
> > >   -	if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
> > > +	if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr ||
> > > +	    attr == &sensor_dev_attr_power1_max_enable.dev_attr.attr)
> > >		return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? attr->mode : 0;
> > >		return 0;
> >
Guenter Roeck Feb. 16, 2023, 7:25 p.m. UTC | #4
On 2/16/23 10:57, Rodrigo Vivi wrote:
> On Tue, Feb 14, 2023 at 07:11:16PM -0800, Dixit, Ashutosh wrote:
>> On Mon, 13 Feb 2023 22:16:44 -0800, Guenter Roeck wrote:
>>>
>>
>> Hi Guenter,
>>
>>> On 2/13/23 21:33, Ashutosh Dixit wrote:
>>>> On ATSM the PL1 power limit is disabled at power up. The previous uapi
>>>> assumed that the PL1 limit is always enabled and therefore did not have a
>>>> notion of a disabled PL1 limit. This results in erroneous PL1 limit values
>>>> when PL1 limit is disabled. For example at power up, the disabled ATSM PL1
>>>> limit is shown as 0 which means a low PL1 limit whereas the limit being
>>>> disabled actually implies a high effective PL1 limit value.
>>>>
>>>> To get round this problem, expose power1_max_enable as a custom hwmon
>>>> attribute. power1_max_enable can be used in conjunction with power1_max to
>>>> interpret power1_max (PL1 limit) values correctly. It can also be used to
>>>> enable/disable the PL1 power limit.
>>>>
>>>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>>> ---
>>>>    .../ABI/testing/sysfs-driver-intel-i915-hwmon |  7 +++
>>>>    drivers/gpu/drm/i915/i915_hwmon.c             | 48 +++++++++++++++++--
>>>>    2 files changed, 51 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
>>>> index 2d6a472eef885..edd94a44b4570 100644
>>>> --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
>>>> +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
>>>> @@ -18,6 +18,13 @@ Description:	RW. Card reactive sustained  (PL1/Tau) power limit in microwatts.
>>>> 			Only supported for particular Intel i915 graphics
>>>> platforms.
>>>>    +What:		/sys/devices/.../hwmon/hwmon<i>/power1_max_enable
>>>
>>> This is not a standard hwmon attribute. The standard attribute would be
>>> power1_enable.
>>>
>>> So from hwmon perspective this is a NACK.
>>
>> Thanks for the feedback. I did consider power1_enable but decided to go
>> with the power1_max_enable custom attribute. Documentation for
>> power1_enable says it is to "Enable or disable the sensors" but in our case
>> we are not enabling/disabling sensors (which we don't have any ability to,
>> neither do we expose any power measurements, only energy from which power
>> can be derived) but enabling/disabling a "power limit" (a limit beyond
>> which HW takes steps to limit power).
> 
> Hi Guenter,
> 
> are you okay with this explanation to release the previous 'nack'?
> 

Not really. My suggested solution would have been to use a value of '0'
to indicate 'disabled' and document it accordingly.

> For me it looks like this case really doesn't fit in the standard ones.
> 
> But also this made me wonder what are the rules for non-standard cases?
> 
> I couldn't find any clear guidelines in here:
> https://docs.kernel.org/hwmon/hwmon-kernel-api.html#driver-provided-sysfs-attributes
> 
> We are seeing drivers around to freely use non-standard hwmon.

Yes, sure, freely. You conveniently ignore

Do not create non-standard attributes unless really needed. If you have to use
non-standard attributes, or you believe you do, discuss it on the mailing list
first. Either case, provide a detailed explanation why you need the non-standard
attribute(s). Standard attributes are specified in Naming and data format
standards for sysfs files.

from Documentation/hwmon/submitting-patches.rst.

> Are we free to add non standard ones as long if doesn't fit in the defined
> standards, or should we really limit the use and do our own thing on our own?
> 

> I mean, for the new Xe driver I was considering to standardize everything
> related to freq and power on top of the hwmon instead of separated sysfs
> files. But this would mean a lot of non-standard stuff on top of a few
> standard hwmon stuff. But I will hold this plan if you tell me that we
> should avoid and limit the non-standard cases.
> 

Oh, I really don't want to keep arguing, especially after your "freely"
above. Do whatever you want, just keep it out of drivers/hwmon.

Guenter
Rodrigo Vivi Feb. 16, 2023, 8:13 p.m. UTC | #5
On Thu, Feb 16, 2023 at 11:25:50AM -0800, Guenter Roeck wrote:
> On 2/16/23 10:57, Rodrigo Vivi wrote:
> > On Tue, Feb 14, 2023 at 07:11:16PM -0800, Dixit, Ashutosh wrote:
> > > On Mon, 13 Feb 2023 22:16:44 -0800, Guenter Roeck wrote:
> > > > 
> > > 
> > > Hi Guenter,
> > > 
> > > > On 2/13/23 21:33, Ashutosh Dixit wrote:
> > > > > On ATSM the PL1 power limit is disabled at power up. The previous uapi
> > > > > assumed that the PL1 limit is always enabled and therefore did not have a
> > > > > notion of a disabled PL1 limit. This results in erroneous PL1 limit values
> > > > > when PL1 limit is disabled. For example at power up, the disabled ATSM PL1
> > > > > limit is shown as 0 which means a low PL1 limit whereas the limit being
> > > > > disabled actually implies a high effective PL1 limit value.
> > > > > 
> > > > > To get round this problem, expose power1_max_enable as a custom hwmon
> > > > > attribute. power1_max_enable can be used in conjunction with power1_max to
> > > > > interpret power1_max (PL1 limit) values correctly. It can also be used to
> > > > > enable/disable the PL1 power limit.
> > > > > 
> > > > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > > > > ---
> > > > >    .../ABI/testing/sysfs-driver-intel-i915-hwmon |  7 +++
> > > > >    drivers/gpu/drm/i915/i915_hwmon.c             | 48 +++++++++++++++++--
> > > > >    2 files changed, 51 insertions(+), 4 deletions(-)
> > > > > 
> > > > > diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> > > > > index 2d6a472eef885..edd94a44b4570 100644
> > > > > --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> > > > > +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> > > > > @@ -18,6 +18,13 @@ Description:	RW. Card reactive sustained  (PL1/Tau) power limit in microwatts.
> > > > > 			Only supported for particular Intel i915 graphics
> > > > > platforms.
> > > > >    +What:		/sys/devices/.../hwmon/hwmon<i>/power1_max_enable
> > > > 
> > > > This is not a standard hwmon attribute. The standard attribute would be
> > > > power1_enable.
> > > > 
> > > > So from hwmon perspective this is a NACK.
> > > 
> > > Thanks for the feedback. I did consider power1_enable but decided to go
> > > with the power1_max_enable custom attribute. Documentation for
> > > power1_enable says it is to "Enable or disable the sensors" but in our case
> > > we are not enabling/disabling sensors (which we don't have any ability to,
> > > neither do we expose any power measurements, only energy from which power
> > > can be derived) but enabling/disabling a "power limit" (a limit beyond
> > > which HW takes steps to limit power).
> > 
> > Hi Guenter,
> > 
> > are you okay with this explanation to release the previous 'nack'?
> > 
> 
> Not really. My suggested solution would have been to use a value of '0'
> to indicate 'disabled' and document it accordingly.
> 
> > For me it looks like this case really doesn't fit in the standard ones.
> > 
> > But also this made me wonder what are the rules for non-standard cases?
> > 
> > I couldn't find any clear guidelines in here:
> > https://docs.kernel.org/hwmon/hwmon-kernel-api.html#driver-provided-sysfs-attributes
> > 
> > We are seeing drivers around to freely use non-standard hwmon.
> 
> Yes, sure, freely. You conveniently ignore
> 
> Do not create non-standard attributes unless really needed. If you have to use
> non-standard attributes, or you believe you do, discuss it on the mailing list
> first. Either case, provide a detailed explanation why you need the non-standard
> attribute(s). Standard attributes are specified in Naming and data format
> standards for sysfs files.
> 
> from Documentation/hwmon/submitting-patches.rst.

I'm sorry for having missed this part. It is not that I ignored it, I
hadn't opened it because the title is on how to get patches
"accepted into the hwmon subsystem".

I was only reading the docs related to use hwmon in the drivers,
not yet at the point were I thought this case was generic enough
to get that *into* hwmon subsystem.

> 
> > Are we free to add non standard ones as long if doesn't fit in the defined
> > standards, or should we really limit the use and do our own thing on our own?
> > 
> 
> > I mean, for the new Xe driver I was considering to standardize everything
> > related to freq and power on top of the hwmon instead of separated sysfs
> > files. But this would mean a lot of non-standard stuff on top of a few
> > standard hwmon stuff. But I will hold this plan if you tell me that we
> > should avoid and limit the non-standard cases.
> > 
> 
> Oh, I really don't want to keep arguing, especially after your "freely"
> above. Do whatever you want, just keep it out of drivers/hwmon.

For the record, I am also not arguing. I'm just trying to understand the
rules. I believe hwmon is such a good infra and based on the basic docs
I had the impression that the expansion of non-standards was allowed
and desireable on non-standard cases and that the contribution to get
standard into hwmon would just come on things that it really looks that
more devices have in common.

> 
> Guenter
>
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index 2d6a472eef885..edd94a44b4570 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -18,6 +18,13 @@  Description:	RW. Card reactive sustained  (PL1/Tau) power limit in microwatts.
 
 		Only supported for particular Intel i915 graphics platforms.
 
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_max_enable
+Date:		May 2023
+KernelVersion:	6.3
+Contact:	intel-gfx@lists.freedesktop.org
+Description:	RW. Enable/disable the PL1 power limit (power1_max).
+
+		Only supported for particular Intel i915 graphics platforms.
 What:		/sys/devices/.../hwmon/hwmon<i>/power1_rated_max
 Date:		February 2023
 KernelVersion:	6.2
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
index 7c20a6f47b92e..5665869d8602b 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -230,13 +230,52 @@  hwm_power1_max_interval_store(struct device *dev,
 					    PKG_PWR_LIM_1_TIME, rxy);
 	return count;
 }
+static SENSOR_DEVICE_ATTR_RW(power1_max_interval, hwm_power1_max_interval, 0);
 
-static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
-			  hwm_power1_max_interval_show,
-			  hwm_power1_max_interval_store, 0);
+static ssize_t
+hwm_power1_max_enable_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+	intel_wakeref_t wakeref;
+	u32 r;
+
+	with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+		r = intel_uncore_read(ddat->uncore, ddat->hwmon->rg.pkg_rapl_limit);
+
+	return sysfs_emit(buf, "%u\n", !!(r & PKG_PWR_LIM_1_EN));
+}
+
+static ssize_t
+hwm_power1_max_enable_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+	intel_wakeref_t wakeref;
+	u32 en, r;
+	bool _en;
+	int ret;
+
+	ret = kstrtobool(buf, &_en);
+	if (ret)
+		return ret;
+
+	en = REG_FIELD_PREP(PKG_PWR_LIM_1_EN, _en);
+	hwm_locked_with_pm_intel_uncore_rmw(ddat, ddat->hwmon->rg.pkg_rapl_limit,
+					    PKG_PWR_LIM_1_EN, en);
+
+	/* Verify, because PL1 limit cannot be disabled on all platforms */
+	with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+		r = intel_uncore_read(ddat->uncore, ddat->hwmon->rg.pkg_rapl_limit);
+	if ((r & PKG_PWR_LIM_1_EN) != en)
+		return -EPERM;
+
+	return count;
+}
+static SENSOR_DEVICE_ATTR_RW(power1_max_enable, hwm_power1_max_enable, 0);
 
 static struct attribute *hwm_attributes[] = {
 	&sensor_dev_attr_power1_max_interval.dev_attr.attr,
+	&sensor_dev_attr_power1_max_enable.dev_attr.attr,
 	NULL
 };
 
@@ -247,7 +286,8 @@  static umode_t hwm_attributes_visible(struct kobject *kobj,
 	struct hwm_drvdata *ddat = dev_get_drvdata(dev);
 	struct i915_hwmon *hwmon = ddat->hwmon;
 
-	if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
+	if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr ||
+	    attr == &sensor_dev_attr_power1_max_enable.dev_attr.attr)
 		return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? attr->mode : 0;
 
 	return 0;