diff mbox series

[v2,1/1] drm/i915/dg1: Add HWMON power sensor support

Message ID 20210413212203.793-2-dale.b.stimson@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/dg1: Add HWMON power sensor support | expand

Commit Message

Stimson, Dale B April 13, 2021, 9:22 p.m. UTC
As part of the System Managemenent Interface (SMI), use the HWMON
subsystem to display power utilization.

The following standard HWMON power sensors are currently supported
(and appropriately scaled):
  /sys/class/drm/card0/device/hwmon/hwmon<i>
	- energy1_input
	- power1_cap
	- power1_max

Some non-standard HWMON power information is also provided, such as
enable bits and intervals.

Signed-off-by: Dale B Stimson <dale.b.stimson@intel.com>
---
 drivers/gpu/drm/i915/Kconfig      |   1 +
 drivers/gpu/drm/i915/Makefile     |   1 +
 drivers/gpu/drm/i915/i915_drv.c   |   9 +
 drivers/gpu/drm/i915/i915_drv.h   |   3 +
 drivers/gpu/drm/i915/i915_hwmon.c | 788 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_hwmon.h |  41 ++
 drivers/gpu/drm/i915/i915_reg.h   |  53 ++
 7 files changed, 896 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_hwmon.c
 create mode 100644 drivers/gpu/drm/i915/i915_hwmon.h

Comments

Jani Nikula April 21, 2021, 3:03 p.m. UTC | #1
On Tue, 13 Apr 2021, Dale B Stimson <dale.b.stimson@intel.com> wrote:
> As part of the System Managemenent Interface (SMI), use the HWMON
> subsystem to display power utilization.
>
> The following standard HWMON power sensors are currently supported
> (and appropriately scaled):
>   /sys/class/drm/card0/device/hwmon/hwmon<i>
> 	- energy1_input
> 	- power1_cap
> 	- power1_max
>
> Some non-standard HWMON power information is also provided, such as
> enable bits and intervals.
>
> Signed-off-by: Dale B Stimson <dale.b.stimson@intel.com>
> ---
>  drivers/gpu/drm/i915/Kconfig      |   1 +
>  drivers/gpu/drm/i915/Makefile     |   1 +
>  drivers/gpu/drm/i915/i915_drv.c   |   9 +
>  drivers/gpu/drm/i915/i915_drv.h   |   3 +
>  drivers/gpu/drm/i915/i915_hwmon.c | 788 ++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_hwmon.h |  41 ++
>  drivers/gpu/drm/i915/i915_reg.h   |  53 ++
>  7 files changed, 896 insertions(+)
>  create mode 100644 drivers/gpu/drm/i915/i915_hwmon.c
>  create mode 100644 drivers/gpu/drm/i915/i915_hwmon.h
>
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 1e1cb245fca77..ec8d5a0d7ea96 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -14,6 +14,7 @@ config DRM_I915
>  	select DRM_MIPI_DSI
>  	select RELAY
>  	select IRQ_WORK
> +	select HWMON
>  	# i915 depends on ACPI_VIDEO when ACPI is enabled
>  	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
>  	select BACKLIGHT_CLASS_DEVICE if ACPI
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index d0d936d9137bc..e213e2b129e20 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -37,6 +37,7 @@ i915-y += i915_drv.o \
>  	  i915_config.o \
>  	  i915_irq.o \
>  	  i915_getparam.o \
> +	  i915_hwmon.o \
>  	  i915_mitigations.o \
>  	  i915_params.o \
>  	  i915_pci.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 305557e1942aa..84c7de3b34c7d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -69,6 +69,7 @@
>  
>  #include "i915_debugfs.h"
>  #include "i915_drv.h"
> +#include "i915_hwmon.h"
>  #include "i915_ioc32.h"
>  #include "i915_irq.h"
>  #include "i915_memcpy.h"
> @@ -675,6 +676,10 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
>  	i915_debugfs_register(dev_priv);
>  	i915_setup_sysfs(dev_priv);
>  
> +	/* Register with hwmon */
> +	if (i915_hwmon_init(&dev_priv->drm))

Please pass in i915, not struct drm_device.

This is i915_driver_register. Almost all functions being have _register
in them. Why not this one?

> +		drm_err(&dev_priv->drm, "Failed to register driver hwmon!\n");

Not sure we want this error message at this level.

> +
>  	/* Depends on sysfs having been initialized */
>  	i915_perf_register(dev_priv);
>  
> @@ -709,9 +714,13 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
>  	intel_gt_driver_unregister(&dev_priv->gt);
>  
>  	i915_perf_unregister(dev_priv);
> +
> +	i915_hwmon_fini(&dev_priv->drm);
> +

Naming, again _unregister in most places.

>  	i915_pmu_unregister(dev_priv);
>  
>  	i915_teardown_sysfs(dev_priv);
> +

Stray newline.

>  	drm_dev_unplug(&dev_priv->drm);
>  
>  	i915_gem_driver_unregister(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 69e43bf91a153..7e9b452c77e2b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -61,6 +61,7 @@
>  #include <drm/drm_connector.h>
>  #include <drm/i915_mei_hdcp_interface.h>
>  
> +#include "i915_hwmon.h"
>  #include "i915_params.h"
>  #include "i915_reg.h"
>  #include "i915_utils.h"
> @@ -1109,6 +1110,8 @@ struct drm_i915_private {
>  
>  	struct i915_perf perf;
>  
> +	struct i915_hwmon hwmon;
> +
>  	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
>  	struct intel_gt gt;
>  
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> new file mode 100644
> index 0000000000000..ab8f32f7ed1de
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> @@ -0,0 +1,788 @@
> +// SPDX-License-Identifier: MIT
> +

Superfluous newline.

> +/*
> + * Copyright © 2020 Intel Corporation
> + */
> +
> +/*
> + * Power-related hwmon entries.
> + */
> +
> +#include <linux/hwmon.h>
> +#include <linux/hwmon-sysfs.h>
> +#include <linux/types.h>
> +
> +#include "i915_drv.h"
> +#include "gt/intel_gt.h"
> +#include "i915_hwmon.h"
> +
> +/*
> + * SF_* - scale factors for particular quantities.
> + * The hwmon standard says that quantities of the given types are specified
> + * in the given units:
> + * - time   - milliseconds
> + * - power  - microwatts
> + * - energy - microjoules
> + */
> +
> +#define SF_TIME		   1000
> +#define SF_POWER	1000000
> +#define SF_ENERGY	1000000
> +
> +static void
> +_locked_with_pm_intel_uncore_rmw(struct intel_uncore *uncore,
> +				 i915_reg_t reg, u32 clear, u32 set)
> +{
> +	struct drm_i915_private *i915 = uncore->i915;
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	intel_wakeref_t wakeref;
> +
> +	mutex_lock(&hwmon->hwmon_lock);
> +
> +	with_intel_runtime_pm(uncore->rpm, wakeref)
> +		intel_uncore_rmw(uncore, reg, clear, set);
> +
> +	mutex_unlock(&hwmon->hwmon_lock);
> +}
> +
> +/*
> + * _field_read_and_scale()

Unnecessary if this isn't kernel-doc, and this need not be kernel-doc.

> + * Return type of u64 allows for the case where the scaling might cause a
> + * result exceeding 32 bits.
> + */
> +static __always_inline u64

Why __always_inline? Why not let the compiler decide what makes sense?

> +_field_read_and_scale(struct intel_uncore *uncore, i915_reg_t rgadr,
> +		      u32 field_msk, int nshift, unsigned int scale_factor)
> +{
> +	intel_wakeref_t wakeref;
> +	u32 reg_value;
> +	u64 scaled_val;
> +
> +	with_intel_runtime_pm(uncore->rpm, wakeref)
> +		reg_value = intel_uncore_read(uncore, rgadr);
> +
> +	reg_value = le32_get_bits(cpu_to_le32(reg_value), field_msk);
> +	scaled_val = mul_u32_u32(scale_factor, reg_value);
> +
> +	/* Shift, rounding to nearest */
> +	if (nshift > 0)
> +		scaled_val = (scaled_val + (1 << (nshift - 1))) >> nshift;
> +
> +	return scaled_val;
> +}
> +
> +/*
> + * _field_read64_and_scale() - read a 64-bit register and scale.

Ditto for kernel-doc style.

> + */
> +static __always_inline u64

Ditto for __always_inline.

> +_field_read64_and_scale(struct intel_uncore *uncore, i915_reg_t rgadr,
> +			u64 field_msk, int nshift, unsigned int scale_factor)
> +{
> +	intel_wakeref_t wakeref;
> +	u64 reg_value;
> +	u64 scaled_val;
> +
> +	with_intel_runtime_pm(uncore->rpm, wakeref)
> +		reg_value = intel_uncore_read64(uncore, rgadr);
> +
> +	reg_value = le64_get_bits(cpu_to_le64(reg_value), field_msk);
> +	scaled_val = scale_factor * reg_value;
> +
> +	/* Shift, rounding to nearest */
> +	if (nshift > 0)
> +		scaled_val = (scaled_val + (1 << (nshift - 1))) >> nshift;
> +
> +	return scaled_val;
> +}
> +
> +/*
> + * _field_scale_and_write()
> + */
> +static __always_inline void
> +_field_scale_and_write(struct intel_uncore *uncore,
> +		       i915_reg_t rgadr,
> +		       u32 field_msk, int nshift,
> +		       unsigned int scale_factor, long lval)
> +{
> +	u32 nval;
> +	u32 bits_to_clear;
> +	u32 bits_to_set;
> +
> +	/* Computation in 64-bits to avoid overflow. Round to nearest. */
> +	nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor);
> +
> +	bits_to_clear = field_msk;
> +	bits_to_set = le32_to_cpu(le32_encode_bits(nval, field_msk));
> +
> +	_locked_with_pm_intel_uncore_rmw(uncore, rgadr,
> +					 bits_to_clear, bits_to_set);
> +}
> +
> +/*
> + * i915_energy1_input_show - A custom function to obtain energy1_input.
> + * Use a custom function instead of the usual hwmon helpers in order to
> + * guarantee 64-bits of result to user-space.
> + * Units are microjoules.
> + *
> + * The underlying hardware register is 32-bits and is subject to overflow.
> + * This function compensates for overflow of the 32-bit register by detecting
> + * wrap-around and incrementing an overflow counter.
> + * This only works if the register is sampled often enough to avoid
> + * missing an instance of overflow - achieved either by repeated
> + * queries through the API, or via a possible timer (future - TBD) that
> + * ensures values are read often enough to catch all overflows.
> + *
> + * How long before overflow?  For example, with an example scaling bit
> + * shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and a power draw of
> + * 1000 watts, the 32-bit counter will overflow in approximately 4.36 minutes.
> + *
> + * Examples:
> + *    1 watt:  (2^32 >> 14) /    1 W / (60 * 60 * 24) secs/day -> 3 days
> + * 1000 watts: (2^32 >> 14) / 1000 W / 60             secs/min -> 4.36 minutes
> + */
> +static ssize_t
> +i915_energy1_input_show(struct device *dev, struct device_attribute *attr,
> +			char *buf)
> +{
> +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> +	struct intel_uncore *uncore = &i915->uncore;
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	int nshift = hwmon->scl_shift_energy;
> +	ssize_t ret;
> +	intel_wakeref_t wakeref;
> +	u32 reg_value;
> +	u64 vlo;
> +	u64 vhi;
> +
> +	mutex_lock(&hwmon->hwmon_lock);
> +
> +	with_intel_runtime_pm(uncore->rpm, wakeref)
> +		reg_value = intel_uncore_read(uncore,
> +					      hwmon->rg.reg_energy_status);
> +
> +	/*
> +	 * The u32 register concatenated with the u32 overflow counter
> +	 * gives an effective energy counter size of 64-bits.  However, the
> +	 * computations below are done modulo 2^96 to avoid overflow during
> +	 * scaling in the conversion to microjoules.
> +	 *
> +	 * The low-order 64-bits of the resulting quantity are returned to
> +	 * the caller in units of microjoules, encoded into a decimal string.
> +	 *
> +	 * For a power of 1000 watts, 64 bits in units of microjoules will
> +	 * overflow after 584 years.
> +	 */
> +
> +	if (hwmon->energy_counter_prev > reg_value)
> +		hwmon->energy_counter_overflow++;
> +
> +	hwmon->energy_counter_prev = reg_value;
> +
> +	/*
> +	 * 64-bit variables vlo and vhi are used for the scaling process.
> +	 * The 96-bit counter value is composed from the two 64-bit variables
> +	 * vhi and vlo thusly:  counter == vhi << 32 + vlo .
> +	 * The 32-bits of overlap between the two variables is convenient for
> +	 * handling overflows out of vlo.
> +	 */
> +
> +	vlo = reg_value;
> +	vhi = hwmon->energy_counter_overflow;
> +
> +	mutex_unlock(&hwmon->hwmon_lock);
> +
> +	vlo = SF_ENERGY * vlo;
> +
> +	/* Prepare to round to nearest */
> +	if (nshift > 0)
> +		vlo += 1 << (nshift - 1);
> +
> +	/*
> +	 * Anything in the upper-32 bits of vlo gets added into vhi here,
> +	 * and then cleared from vlo.
> +	 */
> +	vhi = (SF_ENERGY * vhi) + (vlo >> 32);
> +	vlo &= 0xffffffffULL;
> +
> +	/*
> +	 * Apply the right shift.
> +	 * - vlo shifted by itself.
> +	 * - vlo receiving what's shifted out of vhi.
> +	 * - vhi shifted by itself
> +	 */
> +	vlo = vlo >> nshift;
> +	vlo |= (vhi << (32 - nshift)) & 0xffffffffULL;
> +	vhi = vhi >> nshift;
> +
> +	/* Combined to get a 64-bit result in vlo. */
> +	vlo |= (vhi << 32);
> +
> +	ret = scnprintf(buf, PAGE_SIZE, "%llu\n", vlo);

sysfs_emit() instead?

> +
> +	return ret;
> +}
> +
> +static ssize_t
> +i915_power1_max_enable_show(struct device *dev, struct device_attribute *attr,
> +			    char *buf)
> +{
> +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> +	struct intel_uncore *uncore = &i915->uncore;
> +	intel_wakeref_t wakeref;
> +	ssize_t ret;
> +	u32 reg_value;
> +	bool is_enabled;
> +
> +	with_intel_runtime_pm(uncore->rpm, wakeref)
> +		reg_value = intel_uncore_read(uncore,
> +					      i915->hwmon.rg.pkg_rapl_limit);
> +
> +	is_enabled = !!(reg_value & PKG_PWR_LIM_1_EN);
> +
> +	ret = scnprintf(buf, PAGE_SIZE, "%u\n", is_enabled);
> +
> +	return ret;
> +}
> +
> +static ssize_t
> +i915_power1_max_enable_store(struct device *dev, struct device_attribute *attr,
> +			     const char *buf, size_t count)
> +{
> +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> +	struct intel_uncore *uncore = &i915->uncore;
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	ssize_t ret;
> +	u32 val;
> +	u32 bits_to_clear;
> +	u32 bits_to_set;
> +
> +	ret = kstrtou32(buf, 0, &val);
> +	if (ret)
> +		return ret;
> +
> +	bits_to_clear = PKG_PWR_LIM_1_EN;
> +	if (!val)
> +		bits_to_set = 0;
> +	else
> +		bits_to_set = PKG_PWR_LIM_1_EN;
> +
> +	_locked_with_pm_intel_uncore_rmw(uncore, hwmon->rg.pkg_rapl_limit,
> +					 bits_to_clear, bits_to_set);
> +
> +	return count;
> +}
> +
> +static ssize_t
> +i915_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
> +			      char *buf)
> +{
> +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> +	struct intel_uncore *uncore = &i915->uncore;
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	ssize_t ret;
> +	u64 ullval;
> +
> +	ullval = _field_read_and_scale(uncore, hwmon->rg.pkg_rapl_limit,
> +				       PKG_PWR_LIM_1_TIME,
> +				       hwmon->scl_shift_time, SF_TIME);
> +
> +	ret = scnprintf(buf, PAGE_SIZE, "%llu\n", ullval);
> +
> +	return ret;
> +}
> +
> +static ssize_t
> +i915_power1_max_interval_store(struct device *dev,
> +			       struct device_attribute *attr,
> +			       const char *buf, size_t count)
> +{
> +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> +	struct intel_uncore *uncore = &i915->uncore;
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	ssize_t ret;
> +	long val;
> +
> +	ret = kstrtoul(buf, 0, &val);
> +	if (ret)
> +		return ret;
> +
> +	_field_scale_and_write(uncore, hwmon->rg.pkg_rapl_limit,
> +			       PKG_PWR_LIM_2_TIME,
> +			       hwmon->scl_shift_time, SF_TIME, val);
> +
> +	return count;
> +}
> +
> +static ssize_t
> +i915_power1_cap_enable_show(struct device *dev, struct device_attribute *attr,
> +			    char *buf)
> +{
> +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> +	struct intel_uncore *uncore = &i915->uncore;
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	intel_wakeref_t wakeref;
> +	ssize_t ret;
> +	u32 reg_value;
> +	bool is_enabled;
> +
> +	with_intel_runtime_pm(uncore->rpm, wakeref)
> +		reg_value = intel_uncore_read(uncore,
> +					      hwmon->rg.pkg_rapl_limit_udw);
> +
> +	is_enabled = !!(reg_value & PKG_PWR_LIM_2_EN);
> +
> +	ret = scnprintf(buf, PAGE_SIZE, "%u\n", is_enabled);
> +
> +	return ret;
> +}
> +
> +static ssize_t
> +i915_power1_cap_enable_store(struct device *dev, struct device_attribute *attr,
> +			     const char *buf, size_t count)
> +{
> +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> +	struct intel_uncore *uncore = &i915->uncore;
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	ssize_t ret;
> +	u32 val;
> +	u32 bits_to_clear;
> +	u32 bits_to_set;
> +
> +	ret = kstrtou32(buf, 0, &val);
> +	if (ret)
> +		return ret;
> +
> +	bits_to_clear = PKG_PWR_LIM_2_EN;
> +	if (!val)
> +		bits_to_set = 0;
> +	else
> +		bits_to_set = PKG_PWR_LIM_2_EN;
> +
> +	_locked_with_pm_intel_uncore_rmw(uncore, hwmon->rg.pkg_rapl_limit_udw,
> +					 bits_to_clear, bits_to_set);
> +
> +	return count;
> +}
> +
> +static ssize_t
> +i915_power_default_limit_show(struct device *dev, struct device_attribute *attr,
> +			      char *buf)
> +{
> +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	ssize_t ret;
> +
> +	ret = scnprintf(buf, PAGE_SIZE, "%u\n", hwmon->power_max_initial_value);
> +
> +	return ret;
> +}
> +
> +static ssize_t
> +i915_power_min_limit_show(struct device *dev, struct device_attribute *attr,
> +			  char *buf)
> +{
> +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> +	struct intel_uncore *uncore = &i915->uncore;
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	ssize_t ret;
> +	u32 uval;
> +
> +	/*
> +	 * This is a 64-bit register but the individual fields are under 32 bits
> +	 * in size even after scaling.
> +	 * The UAPI specifies a size of 32 bits.
> +	 * The UAPI specifies that 0 should be returned if unsupported.
> +	 * So, using u32 and %u is sufficient.
> +	 */
> +	if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku))
> +		uval = (u32)_field_read64_and_scale(uncore,
> +						    hwmon->rg.pkg_power_sku,
> +						    PKG_MIN_PWR,
> +						    hwmon->scl_shift_power,
> +						    SF_POWER);
> +	else
> +		uval = 0;
> +
> +	ret = scnprintf(buf, PAGE_SIZE, "%u\n", uval);
> +
> +	return ret;
> +}
> +
> +static ssize_t
> +i915_power_max_limit_show(struct device *dev, struct device_attribute *attr,
> +			  char *buf)
> +{
> +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> +	struct intel_uncore *uncore = &i915->uncore;
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	ssize_t ret;
> +	u32 uval;
> +
> +	/*
> +	 * This is a 64-bit register but the individual fields are under 32 bits
> +	 * in size even after scaling.
> +	 * The UAPI specifies a size of 32 bits.
> +	 * The UAPI specifies that UINT_MAX should be returned if unsupported.
> +	 * So, using u32 and %u is sufficient.
> +	 */
> +	if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku))
> +		uval = (u32)_field_read64_and_scale(uncore,
> +						    hwmon->rg.pkg_power_sku,
> +						    PKG_MAX_PWR,
> +						    hwmon->scl_shift_power,
> +						    SF_POWER);
> +	else
> +		uval = UINT_MAX;
> +
> +	ret = scnprintf(buf, PAGE_SIZE, "%u\n", uval);
> +
> +	return ret;
> +}
> +
> +static SENSOR_DEVICE_ATTR(power1_max_enable, 0664,
> +			  i915_power1_max_enable_show,
> +			  i915_power1_max_enable_store, 0);
> +static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
> +			  i915_power1_max_interval_show,
> +			  i915_power1_max_interval_store, 0);
> +static SENSOR_DEVICE_ATTR(power1_cap_enable, 0664,
> +			  i915_power1_cap_enable_show,
> +			  i915_power1_cap_enable_store, 0);
> +static SENSOR_DEVICE_ATTR(power_default_limit, 0444,
> +			  i915_power_default_limit_show, NULL, 0);
> +static SENSOR_DEVICE_ATTR(power_min_limit, 0444,
> +			  i915_power_min_limit_show, NULL, 0);
> +static SENSOR_DEVICE_ATTR(power_max_limit, 0444,
> +			  i915_power_max_limit_show, NULL, 0);
> +static SENSOR_DEVICE_ATTR(energy1_input, 0444,
> +			  i915_energy1_input_show, NULL, 0);
> +
> +static struct attribute *hwmon_attributes[] = {
> +	&sensor_dev_attr_power1_max_enable.dev_attr.attr,
> +	&sensor_dev_attr_power1_max_interval.dev_attr.attr,
> +	&sensor_dev_attr_power1_cap_enable.dev_attr.attr,
> +	&sensor_dev_attr_power_default_limit.dev_attr.attr,
> +	&sensor_dev_attr_power_min_limit.dev_attr.attr,
> +	&sensor_dev_attr_power_max_limit.dev_attr.attr,
> +	&sensor_dev_attr_energy1_input.dev_attr.attr,
> +	NULL
> +};
> +
> +static umode_t hwmon_attributes_visible(struct kobject *kobj,
> +					struct attribute *attr, int index)
> +{
> +	struct device *dev = kobj_to_dev(kobj);
> +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	i915_reg_t rgadr;
> +
> +	if (attr == &sensor_dev_attr_energy1_input.dev_attr.attr)
> +		rgadr = hwmon->rg.reg_energy_status;
> +	else if (attr == &sensor_dev_attr_power1_max_enable.dev_attr.attr)
> +		rgadr = hwmon->rg.pkg_rapl_limit;
> +	else if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
> +		rgadr = hwmon->rg.pkg_rapl_limit;
> +	else if (attr == &sensor_dev_attr_power1_cap_enable.dev_attr.attr)
> +		rgadr = hwmon->rg.pkg_rapl_limit_udw;
> +	else if (attr == &sensor_dev_attr_power_default_limit.dev_attr.attr)
> +		rgadr = hwmon->rg.pkg_rapl_limit;
> +	else if (attr == &sensor_dev_attr_power_min_limit.dev_attr.attr)
> +		return attr->mode;
> +	else if (attr == &sensor_dev_attr_power_max_limit.dev_attr.attr)
> +		return attr->mode;
> +	else
> +		return 0;
> +
> +	if (!i915_mmio_reg_valid(rgadr))
> +		return 0;
> +
> +	return attr->mode;
> +}
> +
> +static const struct attribute_group hwmon_attrgroup = {
> +	.attrs = hwmon_attributes,
> +	.is_visible = hwmon_attributes_visible,
> +};
> +
> +static const struct attribute_group *hwmon_groups[] = {
> +	&hwmon_attrgroup,
> +	NULL
> +};
> +
> +/*
> + * HWMON SENSOR TYPE = hwmon_power
> + *  - Sustained Power (power1_max)
> + *  - Burst power     (power1_cap)
> + *  - Peak power      (power1_crit)
> + */
> +static const u32 i915_config_power[] = {
> +	HWMON_P_CAP | HWMON_P_MAX,
> +	0
> +};
> +
> +static const struct hwmon_channel_info i915_power = {
> +	.type = hwmon_power,
> +	.config = i915_config_power,
> +};
> +
> +static const struct hwmon_channel_info *i915_info[] = {
> +	&i915_power,
> +	NULL
> +};
> +
> +static umode_t
> +i915_power_is_visible(const struct drm_i915_private *i915, u32 attr, int chan)
> +{
> +	i915_reg_t rgadr;
> +
> +	switch (attr) {
> +	case hwmon_power_max:
> +		rgadr = i915->hwmon.rg.pkg_rapl_limit;
> +		break;
> +	case hwmon_power_cap:
> +		rgadr = i915->hwmon.rg.pkg_rapl_limit_udw;
> +		break;
> +	default:
> +		return 0;
> +	}
> +
> +	if (!i915_mmio_reg_valid(rgadr))
> +		return 0;
> +
> +	return 0664;
> +}
> +
> +static int
> +i915_power_read(struct drm_i915_private *i915, u32 attr, int chan, long *val)
> +{
> +	struct intel_uncore *uncore = &i915->uncore;
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	int ret = 0;
> +
> +	switch (attr) {
> +	case hwmon_power_max:
> +		*val = (long)_field_read_and_scale(uncore,
> +						   hwmon->rg.pkg_rapl_limit,
> +						   PKG_PWR_LIM_1,
> +						   hwmon->scl_shift_power,
> +						   SF_POWER);
> +		break;
> +	case hwmon_power_cap:
> +		*val = (long)_field_read_and_scale(uncore,
> +						   hwmon->rg.pkg_rapl_limit_udw,
> +						   PKG_PWR_LIM_2,
> +						   hwmon->scl_shift_power,
> +						   SF_POWER);
> +		break;
> +	default:
> +		ret = -EOPNOTSUPP;
> +	}
> +
> +	return ret;
> +}
> +
> +static int
> +i915_power_write(struct drm_i915_private *i915, u32 attr, int chan, long val)
> +{
> +	struct intel_uncore *uncore = &i915->uncore;
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	int ret = 0;
> +
> +	switch (attr) {
> +	case hwmon_power_max:
> +		_field_scale_and_write(uncore,
> +				       hwmon->rg.pkg_rapl_limit,
> +				       PKG_PWR_LIM_1,
> +				       hwmon->scl_shift_power,
> +				       SF_POWER, val);
> +		break;
> +	case hwmon_power_cap:
> +		_field_scale_and_write(uncore,
> +				       hwmon->rg.pkg_rapl_limit_udw,
> +				       PKG_PWR_LIM_2,
> +				       hwmon->scl_shift_power,
> +				       SF_POWER, val);
> +		break;
> +	default:
> +		ret = -EOPNOTSUPP;
> +	}
> +
> +	return ret;
> +}
> +
> +static umode_t
> +i915_is_visible(const void *data, enum hwmon_sensor_types type,
> +		u32 attr, int channel)
> +{
> +	struct drm_i915_private *i915 = (struct drm_i915_private *)data;
> +
> +	switch (type) {
> +	case hwmon_power:
> +		return i915_power_is_visible(i915, attr, channel);
> +	default:
> +		return 0;
> +	}
> +}
> +
> +static int
> +i915_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
> +	  int channel, long *val)
> +{
> +	struct drm_i915_private *i915 = kdev_to_i915(dev);
> +
> +	switch (type) {
> +	case hwmon_power:
> +		return i915_power_read(i915, attr, channel, val);
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +}
> +
> +static int
> +i915_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
> +	   int channel, long val)
> +{
> +	struct drm_i915_private *i915 = kdev_to_i915(dev);
> +
> +	switch (type) {
> +	case hwmon_power:
> +		return i915_power_write(i915, attr, channel, val);
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +}
> +
> +static const struct hwmon_ops i915_hwmon_ops = {
> +	.is_visible = i915_is_visible,
> +	.read = i915_read,
> +	.write = i915_write,
> +};
> +
> +static const struct hwmon_chip_info i915_chip_info = {
> +	.ops = &i915_hwmon_ops,
> +	.info = i915_info,
> +};
> +
> +static void
> +i915_hwmon_get_preregistration_info(struct drm_i915_private *i915)
> +{
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	struct intel_uncore *uncore = &i915->uncore;
> +	intel_wakeref_t wakeref;
> +	u32 val_sku_unit;
> +	__le32 le_sku_unit;
> +
> +	if (IS_DG1(i915)) {
> +		hwmon->rg.pkg_power_sku_unit = PCU_PACKAGE_POWER_SKU_UNIT;
> +		hwmon->rg.pkg_power_sku = PCU_PACKAGE_POWER_SKU;
> +		hwmon->rg.pkg_energy_status = PCU_PACKAGE_ENERGY_STATUS;
> +		hwmon->rg.pkg_rapl_limit = PCU_PACKAGE_RAPL_LIMIT;
> +		hwmon->rg.pkg_rapl_limit_udw = PCU_PACKAGE_RAPL_LIMIT_UDW;
> +		hwmon->rg.plt_energy_status = PCU_PLATFORM_ENERGY_STATUS;
> +	} else {
> +		hwmon->rg.pkg_power_sku_unit = INVALID_MMIO_REG;
> +		hwmon->rg.pkg_power_sku = INVALID_MMIO_REG;
> +		hwmon->rg.pkg_energy_status = INVALID_MMIO_REG;
> +		hwmon->rg.pkg_rapl_limit = INVALID_MMIO_REG;
> +		hwmon->rg.pkg_rapl_limit_udw = INVALID_MMIO_REG;
> +		hwmon->rg.plt_energy_status = INVALID_MMIO_REG;
> +	}
> +
> +	/*
> +	 * If a platform does not support *_PLATFORM_ENERGY_STATUS,
> +	 * try *PACKAGE_ENERGY_STATUS.
> +	 */
> +	if (i915_mmio_reg_valid(hwmon->rg.plt_energy_status))
> +		hwmon->rg.reg_energy_status = hwmon->rg.plt_energy_status;
> +	else
> +		hwmon->rg.reg_energy_status = hwmon->rg.pkg_energy_status;
> +
> +	wakeref = intel_runtime_pm_get(uncore->rpm);
> +
> +	/*
> +	 * The contents of register hwmon->rg.pkg_power_sku_unit do not change,
> +	 * so read it once and store the shift values.
> +	 */
> +	if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku_unit))
> +		val_sku_unit = intel_uncore_read(uncore,
> +						 hwmon->rg.pkg_power_sku_unit);
> +	else
> +		val_sku_unit = 0;
> +
> +	hwmon->energy_counter_overflow = 0;
> +
> +	if (i915_mmio_reg_valid(hwmon->rg.reg_energy_status))
> +		hwmon->energy_counter_prev =
> +			intel_uncore_read(uncore, hwmon->rg.reg_energy_status);
> +	else
> +		hwmon->energy_counter_prev = 0;
> +
> +	intel_runtime_pm_put(uncore->rpm, wakeref);
> +
> +	le_sku_unit = cpu_to_le32(val_sku_unit);
> +	hwmon->scl_shift_power = le32_get_bits(le_sku_unit, PKG_PWR_UNIT);
> +	hwmon->scl_shift_energy = le32_get_bits(le_sku_unit, PKG_ENERGY_UNIT);
> +	hwmon->scl_shift_time = le32_get_bits(le_sku_unit, PKG_TIME_UNIT);
> +
> +	/*
> +	 * There is no direct way to obtain the power default_limit.
> +	 * The best known workaround is to use the initial value of power1_max.
> +	 *
> +	 * The value of power1_max is reset to the default on reboot, but is
> +	 * not reset by a module unload/load sequence.  To allow proper
> +	 * functioning after a module reload, the value for power1_max is
> +	 * restored to its original value at module unload time in
> +	 * i915_hwmon_fini().
> +	 */
> +	hwmon->power_max_initial_value =
> +		(u32)_field_read_and_scale(uncore,
> +					   hwmon->rg.pkg_rapl_limit,
> +					   PKG_PWR_LIM_1,
> +					   hwmon->scl_shift_power, SF_POWER);
> +}
> +
> +int i915_hwmon_init(struct drm_device *drm_dev)
> +{
> +	struct drm_i915_private *i915 = to_i915(drm_dev);
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +	struct device *hwmon_dev;
> +
> +	mutex_init(&hwmon->hwmon_lock);
> +
> +	i915_hwmon_get_preregistration_info(i915);
> +
> +	hwmon_dev = hwmon_device_register_with_info(drm_dev->dev, "i915",
> +						    drm_dev,
> +						    &i915_chip_info,
> +						    hwmon_groups);
> +
> +	if (IS_ERR(hwmon_dev)) {
> +		mutex_destroy(&hwmon->hwmon_lock);
> +		return PTR_ERR(hwmon_dev);
> +	}
> +
> +	hwmon->dev = hwmon_dev;
> +
> +	return 0;
> +}
> +
> +void i915_hwmon_fini(struct drm_device *drm_dev)
> +{
> +	struct drm_i915_private *i915 = to_i915(drm_dev);
> +	struct i915_hwmon *hwmon = &i915->hwmon;
> +
> +	if (hwmon->power_max_initial_value) {
> +		/* Restore power1_max. */
> +		_field_scale_and_write(&i915->uncore, hwmon->rg.pkg_rapl_limit,
> +				       PKG_PWR_LIM_1, hwmon->scl_shift_power,
> +				       SF_POWER,
> +				       hwmon->power_max_initial_value);
> +	}
> +
> +	if (hwmon->dev)
> +		hwmon_device_unregister(hwmon->dev);
> +
> +	mutex_destroy(&hwmon->hwmon_lock);
> +
> +	memset(hwmon, 0, sizeof(*hwmon));
> +}
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h
> new file mode 100644
> index 0000000000000..0be919f0a463d
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_hwmon.h
> @@ -0,0 +1,41 @@
> +/* SPDX-License-Identifier: MIT */
> +
> +/*
> + * Copyright © 2020 Intel Corporation
> + */
> +
> +#ifndef __INTEL_HWMON_H__
> +#define __INTEL_HWMON_H__
> +
> +#include <drm/drm_device.h>

A forward declaration should be enough. Need <linux/types.h> though.

> +#include "i915_reg.h"
> +
> +struct i915_hwmon_reg {
> +	i915_reg_t pkg_power_sku_unit;
> +	i915_reg_t pkg_power_sku;
> +	i915_reg_t pkg_energy_status;
> +	i915_reg_t pkg_rapl_limit;
> +	i915_reg_t pkg_rapl_limit_udw;
> +	i915_reg_t plt_energy_status;
> +	i915_reg_t reg_energy_status;
> +};
> +
> +struct i915_hwmon {
> +	struct device *dev;
> +	struct mutex hwmon_lock;	/* counter overflow logic and rmw */
> +
> +	struct i915_hwmon_reg rg;
> +
> +	u32 energy_counter_overflow;
> +	u32 energy_counter_prev;
> +	u32 power_max_initial_value;
> +
> +	int scl_shift_power;
> +	int scl_shift_energy;
> +	int scl_shift_time;
> +};
> +
> +int i915_hwmon_init(struct drm_device *drm_dev);
> +void i915_hwmon_fini(struct drm_device *drm_dev);
> +
> +#endif
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f80d656331f42..62fccf71ddad6 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -4071,6 +4071,59 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>  #define BXT_RP_STATE_CAP        _MMIO(0x138170)
>  #define GEN9_RP_STATE_LIMITS	_MMIO(0x138148)
>  
> +/* DG1 */
> +
> +/* based on MCHBAR_MIRROR_BASE_SNB == 0x140000 */
> +#define PCU_PACKAGE_POWER_SKU_UNIT	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5938)
> +#define PCU_PACKAGE_ENERGY_STATUS	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x593c)
> +#define PCU_PACKAGE_RAPL_LIMIT		_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
> +#define PCU_PACKAGE_RAPL_LIMIT_UDW	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x59a4)
> +#define PCU_PACKAGE_POWER_SKU		INVALID_MMIO_REG
> +#define PCU_PLATFORM_ENERGY_STATUS	INVALID_MMIO_REG
> +
> +/* Fields for *_PACKAGE_RAPL_LIMIT: */
> +#define   PKG_PWR_LIM_1			REG_GENMASK(14, 0)
> +#define   PKG_PWR_LIM_1_EN		REG_BIT(15)
> +#define   PKG_PWR_LIM_1_TIME		REG_GENMASK(23, 17)
> +
> +/*
> + * Fields for *_PACKAGE_RAPL_LIMIT_UDW:
> + * In docs, these fields may be defined relative to the entire 64-bit
> + * register, but here they are defined relative to the 32-bit boundary.
> + */
> +#define   PKG_PWR_LIM_2			REG_GENMASK(14, 0)	// 46:32
> +#define   PKG_PWR_LIM_2_EN		REG_BIT(15)		// 47:47
> +#define   PKG_PWR_LIM_2_TIME		REG_GENMASK(23, 17)	// 55:49
> +
> +/*
> + * *_PACKAGE_POWER_SKU_UNIT - fields specifying scaling for PCU quantities.
> + * - PKG_PWR_UNIT - Power Units used for power control registers. The
> + *   actual unit value is calculated by 1 W / Power(2,PKG_PWR_UNIT).
> + * - PKG_ENERGY_UNIT - Energy Units used for power control registers. The
> + *   actual unit value is calculated by 1 J / Power(2,PKG_ENERGY_UNIT).
> + * - PKG_TIME_UNIT - Time Units used for power control registers. The
> + *   actual unit value is calculated by 1 s / Power(2,PKG_TIME_UNIT).
> + */
> +#define   PKG_PWR_UNIT			REG_GENMASK(3, 0)
> +#define   PKG_ENERGY_UNIT		REG_GENMASK(12, 8)
> +#define   PKG_TIME_UNIT			REG_GENMASK(19, 16)
> +
> +/*
> + * *_PACKAGE_POWER_SKU - SKU power and timing parameters.
> + * Used herein as a 64-bit bit register.
> + * These masks are defined using GENMASK_ULL as REG_GENMASK is limited to u32
> + * and as GENMASK is "long" and therefore 32-bits on a 32-bit system.
> + * PKG_PKG_TDP, PKG_MIN_PWR, and PKG_MAX_PWR are scaled in the same way as
> + * PKG_PWR_LIM_*, above.
> + * PKG_MAX_WIN has sub-fields for x and y, and has the value: is 1.x * 2^y.
> + */
> +#define   PKG_PKG_TDP			GENMASK_ULL(14, 0)
> +#define   PKG_MIN_PWR			GENMASK_ULL(30, 16)
> +#define   PKG_MAX_PWR			GENMASK_ULL(46, 32)
> +#define   PKG_MAX_WIN			GENMASK_ULL(54, 48)
> +#define     PKG_MAX_WIN_Y		GENMASK_ULL(54, 53)
> +#define     PKG_MAX_WIN_X		GENMASK_ULL(52, 48)
> +
>  /*
>   * Logical Context regs
>   */
Stimson, Dale B May 14, 2021, 11:40 p.m. UTC | #2
On 2021-04-21 18:03:51, Jani Nikula wrote:
> On Tue, 13 Apr 2021, Dale B Stimson <dale.b.stimson@intel.com> wrote:
> > As part of the System Managemenent Interface (SMI), use the HWMON
> > subsystem to display power utilization.
> >
> > The following standard HWMON power sensors are currently supported
> > (and appropriately scaled):
> >   /sys/class/drm/card0/device/hwmon/hwmon<i>
> > 	- energy1_input
> > 	- power1_cap
> > 	- power1_max
> >
> > Some non-standard HWMON power information is also provided, such as
> > enable bits and intervals.
> >
> > Signed-off-by: Dale B Stimson <dale.b.stimson@intel.com>
> > ---
> >  drivers/gpu/drm/i915/Kconfig      |   1 +
> >  drivers/gpu/drm/i915/Makefile     |   1 +
> >  drivers/gpu/drm/i915/i915_drv.c   |   9 +
> >  drivers/gpu/drm/i915/i915_drv.h   |   3 +
> >  drivers/gpu/drm/i915/i915_hwmon.c | 788 ++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_hwmon.h |  41 ++
> >  drivers/gpu/drm/i915/i915_reg.h   |  53 ++
> >  7 files changed, 896 insertions(+)
> >  create mode 100644 drivers/gpu/drm/i915/i915_hwmon.c
> >  create mode 100644 drivers/gpu/drm/i915/i915_hwmon.h
> >
> > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> > index 1e1cb245fca77..ec8d5a0d7ea96 100644
> > --- a/drivers/gpu/drm/i915/Kconfig
> > +++ b/drivers/gpu/drm/i915/Kconfig
> > @@ -14,6 +14,7 @@ config DRM_I915
> >  	select DRM_MIPI_DSI
> >  	select RELAY
> >  	select IRQ_WORK
> > +	select HWMON
> >  	# i915 depends on ACPI_VIDEO when ACPI is enabled
> >  	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
> >  	select BACKLIGHT_CLASS_DEVICE if ACPI
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index d0d936d9137bc..e213e2b129e20 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -37,6 +37,7 @@ i915-y += i915_drv.o \
> >  	  i915_config.o \
> >  	  i915_irq.o \
> >  	  i915_getparam.o \
> > +	  i915_hwmon.o \
> >  	  i915_mitigations.o \
> >  	  i915_params.o \
> >  	  i915_pci.o \
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 305557e1942aa..84c7de3b34c7d 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -69,6 +69,7 @@
> >  
> >  #include "i915_debugfs.h"
> >  #include "i915_drv.h"
> > +#include "i915_hwmon.h"
> >  #include "i915_ioc32.h"
> >  #include "i915_irq.h"
> >  #include "i915_memcpy.h"
> > @@ -675,6 +676,10 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
> >  	i915_debugfs_register(dev_priv);
> >  	i915_setup_sysfs(dev_priv);
> >  
> > +	/* Register with hwmon */
> > +	if (i915_hwmon_init(&dev_priv->drm))
> 
> Please pass in i915, not struct drm_device.

Done.

> This is i915_driver_register. Almost all functions being have _register
> in them. Why not this one?

I have changed the function names to get i915_hwmon_register and
i915_hwmon_unregister.

> > +		drm_err(&dev_priv->drm, "Failed to register driver hwmon!\n");
> 
> Not sure we want this error message at this level.

I have removed this error message and changed i915_hwmon_register so it
returns void instead of int.

> 
> > +
> >  	/* Depends on sysfs having been initialized */
> >  	i915_perf_register(dev_priv);
> >  
> > @@ -709,9 +714,13 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
> >  	intel_gt_driver_unregister(&dev_priv->gt);
> >  
> >  	i915_perf_unregister(dev_priv);
> > +
> > +	i915_hwmon_fini(&dev_priv->drm);
> > +
> 
> Naming, again _unregister in most places.
Fixed.

> 
> >  	i915_pmu_unregister(dev_priv);
> >  
> >  	i915_teardown_sysfs(dev_priv);
> > +
> 
> Stray newline.
Fixed.

> 
> >  	drm_dev_unplug(&dev_priv->drm);
> >  
> >  	i915_gem_driver_unregister(dev_priv);
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 69e43bf91a153..7e9b452c77e2b 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -61,6 +61,7 @@
> >  #include <drm/drm_connector.h>
> >  #include <drm/i915_mei_hdcp_interface.h>
> >  
> > +#include "i915_hwmon.h"
> >  #include "i915_params.h"
> >  #include "i915_reg.h"
> >  #include "i915_utils.h"
> > @@ -1109,6 +1110,8 @@ struct drm_i915_private {
> >  
> >  	struct i915_perf perf;
> >  
> > +	struct i915_hwmon hwmon;
> > +
> >  	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
> >  	struct intel_gt gt;
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> > new file mode 100644
> > index 0000000000000..ab8f32f7ed1de
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> > @@ -0,0 +1,788 @@
> > +// SPDX-License-Identifier: MIT
> > +
> 
> Superfluous newline.
Fixed.
> 
> > +/*
> > + * Copyright © 2020 Intel Corporation
> > + */
> > +
> > +/*
> > + * Power-related hwmon entries.
> > + */
> > +
> > +#include <linux/hwmon.h>
> > +#include <linux/hwmon-sysfs.h>
> > +#include <linux/types.h>
> > +
> > +#include "i915_drv.h"
> > +#include "gt/intel_gt.h"
> > +#include "i915_hwmon.h"
> > +
> > +/*
> > + * SF_* - scale factors for particular quantities.
> > + * The hwmon standard says that quantities of the given types are specified
> > + * in the given units:
> > + * - time   - milliseconds
> > + * - power  - microwatts
> > + * - energy - microjoules
> > + */
> > +
> > +#define SF_TIME		   1000
> > +#define SF_POWER	1000000
> > +#define SF_ENERGY	1000000
> > +
> > +static void
> > +_locked_with_pm_intel_uncore_rmw(struct intel_uncore *uncore,
> > +				 i915_reg_t reg, u32 clear, u32 set)
> > +{
> > +	struct drm_i915_private *i915 = uncore->i915;
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	intel_wakeref_t wakeref;
> > +
> > +	mutex_lock(&hwmon->hwmon_lock);
> > +
> > +	with_intel_runtime_pm(uncore->rpm, wakeref)
> > +		intel_uncore_rmw(uncore, reg, clear, set);
> > +
> > +	mutex_unlock(&hwmon->hwmon_lock);
> > +}
> > +
> > +/*
> > + * _field_read_and_scale()
> 
> Unnecessary if this isn't kernel-doc, and this need not be kernel-doc.

I assume your comment is about the inclusion of the function name on the
first line of the comment block and that you are not suggesting removal of
the pre-function comment block altogether, right?

I have removed the function name from the comment, which reads:

/*
 * This function's return type of u64 allows for the case where the scaling
 * of the 32-bit register value might cause a result to exceed 32 bits.
 */

That seems appropriate to me, and it's information that I want to convey.
What do you think?

> > + * Return type of u64 allows for the case where the scaling might cause a
> > + * result exceeding 32 bits.
> > + */
> > +static __always_inline u64
> 
> Why __always_inline? Why not let the compiler decide what makes sense?

Instigated by your comment, I have changed this.  First, a word as to why
it was the way it was:

This is the comment that I had written to explain the way that it was:

/*
 * This function is declared as __always_inline because it employs macro
 * le32_get_bits, which expects certain of its arguments to be constants.
 * The compiler would lose the "constant" property of those arguments if
 * the function is invoked as non-inline (which the compiler is free to do
 * if only declared "inline").
 */

The diagnostic when not inline:

In function ‘field_multiplier’,
    inlined from ‘le32_get_bits’ at ./include/linux/bitfield.h:154:1,
    inlined from ‘_field_read_and_scale’ at drivers/gpu/drm/i915/i915_hwmon.c:63:12:
./include/linux/bitfield.h:119:3: error: call to ‘__bad_mask’ declared with attribute error: bad bitfield mask
   __bad_mask();

Requiring "inline" is a hack.  Also, it depends on the compiler implementation.
Accordingly, I've implemented this in a different way.

A new parameter called "field_shift" has been added to each of those three
functions, avoiding the use of the le32* macros.  These functions now do
the mask extraction/insertion explicitly.

This approach requires knowing the number of bits by which the mask must
be shifted.  The shift value is now obtained using the same method as used
internally by the le32* macros in include/linux/bitfield.h (but invoked from
where the mask value is available as a constant, outside of the function call).

> > +_field_read_and_scale(struct intel_uncore *uncore, i915_reg_t rgadr,
> > +		      u32 field_msk, int nshift, unsigned int scale_factor)
> > +{
> > +	intel_wakeref_t wakeref;
> > +	u32 reg_value;
> > +	u64 scaled_val;
> > +
> > +	with_intel_runtime_pm(uncore->rpm, wakeref)
> > +		reg_value = intel_uncore_read(uncore, rgadr);
> > +
> > +	reg_value = le32_get_bits(cpu_to_le32(reg_value), field_msk);
> > +	scaled_val = mul_u32_u32(scale_factor, reg_value);
> > +
> > +	/* Shift, rounding to nearest */
> > +	if (nshift > 0)
> > +		scaled_val = (scaled_val + (1 << (nshift - 1))) >> nshift;
> > +
> > +	return scaled_val;
> > +}
> > +
> > +/*
> > + * _field_read64_and_scale() - read a 64-bit register and scale.
> 
> Ditto for kernel-doc style.
Fixed.

> 
> > + */
> > +static __always_inline u64
> 
> Ditto for __always_inline.
Fixed.
> 
> > +_field_read64_and_scale(struct intel_uncore *uncore, i915_reg_t rgadr,
> > +			u64 field_msk, int nshift, unsigned int scale_factor)
> > +{
> > +	intel_wakeref_t wakeref;
> > +	u64 reg_value;
> > +	u64 scaled_val;
> > +
> > +	with_intel_runtime_pm(uncore->rpm, wakeref)
> > +		reg_value = intel_uncore_read64(uncore, rgadr);
> > +
> > +	reg_value = le64_get_bits(cpu_to_le64(reg_value), field_msk);
> > +	scaled_val = scale_factor * reg_value;
> > +
> > +	/* Shift, rounding to nearest */
> > +	if (nshift > 0)
> > +		scaled_val = (scaled_val + (1 << (nshift - 1))) >> nshift;
> > +
> > +	return scaled_val;
> > +}
> > +
> > +/*
> > + * _field_scale_and_write()
> > + */
> > +static __always_inline void
> > +_field_scale_and_write(struct intel_uncore *uncore,
> > +		       i915_reg_t rgadr,
> > +		       u32 field_msk, int nshift,
> > +		       unsigned int scale_factor, long lval)
> > +{
> > +	u32 nval;
> > +	u32 bits_to_clear;
> > +	u32 bits_to_set;
> > +
> > +	/* Computation in 64-bits to avoid overflow. Round to nearest. */
> > +	nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor);
> > +
> > +	bits_to_clear = field_msk;
> > +	bits_to_set = le32_to_cpu(le32_encode_bits(nval, field_msk));
> > +
> > +	_locked_with_pm_intel_uncore_rmw(uncore, rgadr,
> > +					 bits_to_clear, bits_to_set);
> > +}
> > +
> > +/*
> > + * i915_energy1_input_show - A custom function to obtain energy1_input.
> > + * Use a custom function instead of the usual hwmon helpers in order to
> > + * guarantee 64-bits of result to user-space.
> > + * Units are microjoules.
> > + *
> > + * The underlying hardware register is 32-bits and is subject to overflow.
> > + * This function compensates for overflow of the 32-bit register by detecting
> > + * wrap-around and incrementing an overflow counter.
> > + * This only works if the register is sampled often enough to avoid
> > + * missing an instance of overflow - achieved either by repeated
> > + * queries through the API, or via a possible timer (future - TBD) that
> > + * ensures values are read often enough to catch all overflows.
> > + *
> > + * How long before overflow?  For example, with an example scaling bit
> > + * shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and a power draw of
> > + * 1000 watts, the 32-bit counter will overflow in approximately 4.36 minutes.
> > + *
> > + * Examples:
> > + *    1 watt:  (2^32 >> 14) /    1 W / (60 * 60 * 24) secs/day -> 3 days
> > + * 1000 watts: (2^32 >> 14) / 1000 W / 60             secs/min -> 4.36 minutes
> > + */
> > +static ssize_t
> > +i915_energy1_input_show(struct device *dev, struct device_attribute *attr,
> > +			char *buf)
> > +{
> > +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> > +	struct intel_uncore *uncore = &i915->uncore;
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	int nshift = hwmon->scl_shift_energy;
> > +	ssize_t ret;
> > +	intel_wakeref_t wakeref;
> > +	u32 reg_value;
> > +	u64 vlo;
> > +	u64 vhi;
> > +
> > +	mutex_lock(&hwmon->hwmon_lock);
> > +
> > +	with_intel_runtime_pm(uncore->rpm, wakeref)
> > +		reg_value = intel_uncore_read(uncore,
> > +					      hwmon->rg.reg_energy_status);
> > +
> > +	/*
> > +	 * The u32 register concatenated with the u32 overflow counter
> > +	 * gives an effective energy counter size of 64-bits.  However, the
> > +	 * computations below are done modulo 2^96 to avoid overflow during
> > +	 * scaling in the conversion to microjoules.
> > +	 *
> > +	 * The low-order 64-bits of the resulting quantity are returned to
> > +	 * the caller in units of microjoules, encoded into a decimal string.
> > +	 *
> > +	 * For a power of 1000 watts, 64 bits in units of microjoules will
> > +	 * overflow after 584 years.
> > +	 */
> > +
> > +	if (hwmon->energy_counter_prev > reg_value)
> > +		hwmon->energy_counter_overflow++;
> > +
> > +	hwmon->energy_counter_prev = reg_value;
> > +
> > +	/*
> > +	 * 64-bit variables vlo and vhi are used for the scaling process.
> > +	 * The 96-bit counter value is composed from the two 64-bit variables
> > +	 * vhi and vlo thusly:  counter == vhi << 32 + vlo .
> > +	 * The 32-bits of overlap between the two variables is convenient for
> > +	 * handling overflows out of vlo.
> > +	 */
> > +
> > +	vlo = reg_value;
> > +	vhi = hwmon->energy_counter_overflow;
> > +
> > +	mutex_unlock(&hwmon->hwmon_lock);
> > +
> > +	vlo = SF_ENERGY * vlo;
> > +
> > +	/* Prepare to round to nearest */
> > +	if (nshift > 0)
> > +		vlo += 1 << (nshift - 1);
> > +
> > +	/*
> > +	 * Anything in the upper-32 bits of vlo gets added into vhi here,
> > +	 * and then cleared from vlo.
> > +	 */
> > +	vhi = (SF_ENERGY * vhi) + (vlo >> 32);
> > +	vlo &= 0xffffffffULL;
> > +
> > +	/*
> > +	 * Apply the right shift.
> > +	 * - vlo shifted by itself.
> > +	 * - vlo receiving what's shifted out of vhi.
> > +	 * - vhi shifted by itself
> > +	 */
> > +	vlo = vlo >> nshift;
> > +	vlo |= (vhi << (32 - nshift)) & 0xffffffffULL;
> > +	vhi = vhi >> nshift;
> > +
> > +	/* Combined to get a 64-bit result in vlo. */
> > +	vlo |= (vhi << 32);
> > +
> > +	ret = scnprintf(buf, PAGE_SIZE, "%llu\n", vlo);
> 
> sysfs_emit() instead?

Yes.  At the time of writing, sysfs_emit (being relatively new) was not
available on the development branch being used.  Now it is.  The code has
been switched to sysfs_emit().

> 
> > +
> > +	return ret;
> > +}
> > +
> > +static ssize_t
> > +i915_power1_max_enable_show(struct device *dev, struct device_attribute *attr,
> > +			    char *buf)
> > +{
> > +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> > +	struct intel_uncore *uncore = &i915->uncore;
> > +	intel_wakeref_t wakeref;
> > +	ssize_t ret;
> > +	u32 reg_value;
> > +	bool is_enabled;
> > +
> > +	with_intel_runtime_pm(uncore->rpm, wakeref)
> > +		reg_value = intel_uncore_read(uncore,
> > +					      i915->hwmon.rg.pkg_rapl_limit);
> > +
> > +	is_enabled = !!(reg_value & PKG_PWR_LIM_1_EN);
> > +
> > +	ret = scnprintf(buf, PAGE_SIZE, "%u\n", is_enabled);
> > +
> > +	return ret;
> > +}
> > +
> > +static ssize_t
> > +i915_power1_max_enable_store(struct device *dev, struct device_attribute *attr,
> > +			     const char *buf, size_t count)
> > +{
> > +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> > +	struct intel_uncore *uncore = &i915->uncore;
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	ssize_t ret;
> > +	u32 val;
> > +	u32 bits_to_clear;
> > +	u32 bits_to_set;
> > +
> > +	ret = kstrtou32(buf, 0, &val);
> > +	if (ret)
> > +		return ret;
> > +
> > +	bits_to_clear = PKG_PWR_LIM_1_EN;
> > +	if (!val)
> > +		bits_to_set = 0;
> > +	else
> > +		bits_to_set = PKG_PWR_LIM_1_EN;
> > +
> > +	_locked_with_pm_intel_uncore_rmw(uncore, hwmon->rg.pkg_rapl_limit,
> > +					 bits_to_clear, bits_to_set);
> > +
> > +	return count;
> > +}
> > +
> > +static ssize_t
> > +i915_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
> > +			      char *buf)
> > +{
> > +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> > +	struct intel_uncore *uncore = &i915->uncore;
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	ssize_t ret;
> > +	u64 ullval;
> > +
> > +	ullval = _field_read_and_scale(uncore, hwmon->rg.pkg_rapl_limit,
> > +				       PKG_PWR_LIM_1_TIME,
> > +				       hwmon->scl_shift_time, SF_TIME);
> > +
> > +	ret = scnprintf(buf, PAGE_SIZE, "%llu\n", ullval);
> > +
> > +	return ret;
> > +}
> > +
> > +static ssize_t
> > +i915_power1_max_interval_store(struct device *dev,
> > +			       struct device_attribute *attr,
> > +			       const char *buf, size_t count)
> > +{
> > +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> > +	struct intel_uncore *uncore = &i915->uncore;
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	ssize_t ret;
> > +	long val;
> > +
> > +	ret = kstrtoul(buf, 0, &val);
> > +	if (ret)
> > +		return ret;
> > +
> > +	_field_scale_and_write(uncore, hwmon->rg.pkg_rapl_limit,
> > +			       PKG_PWR_LIM_2_TIME,
> > +			       hwmon->scl_shift_time, SF_TIME, val);
> > +
> > +	return count;
> > +}
> > +
> > +static ssize_t
> > +i915_power1_cap_enable_show(struct device *dev, struct device_attribute *attr,
> > +			    char *buf)
> > +{
> > +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> > +	struct intel_uncore *uncore = &i915->uncore;
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	intel_wakeref_t wakeref;
> > +	ssize_t ret;
> > +	u32 reg_value;
> > +	bool is_enabled;
> > +
> > +	with_intel_runtime_pm(uncore->rpm, wakeref)
> > +		reg_value = intel_uncore_read(uncore,
> > +					      hwmon->rg.pkg_rapl_limit_udw);
> > +
> > +	is_enabled = !!(reg_value & PKG_PWR_LIM_2_EN);
> > +
> > +	ret = scnprintf(buf, PAGE_SIZE, "%u\n", is_enabled);
> > +
> > +	return ret;
> > +}
> > +
> > +static ssize_t
> > +i915_power1_cap_enable_store(struct device *dev, struct device_attribute *attr,
> > +			     const char *buf, size_t count)
> > +{
> > +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> > +	struct intel_uncore *uncore = &i915->uncore;
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	ssize_t ret;
> > +	u32 val;
> > +	u32 bits_to_clear;
> > +	u32 bits_to_set;
> > +
> > +	ret = kstrtou32(buf, 0, &val);
> > +	if (ret)
> > +		return ret;
> > +
> > +	bits_to_clear = PKG_PWR_LIM_2_EN;
> > +	if (!val)
> > +		bits_to_set = 0;
> > +	else
> > +		bits_to_set = PKG_PWR_LIM_2_EN;
> > +
> > +	_locked_with_pm_intel_uncore_rmw(uncore, hwmon->rg.pkg_rapl_limit_udw,
> > +					 bits_to_clear, bits_to_set);
> > +
> > +	return count;
> > +}
> > +
> > +static ssize_t
> > +i915_power_default_limit_show(struct device *dev, struct device_attribute *attr,
> > +			      char *buf)
> > +{
> > +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	ssize_t ret;
> > +
> > +	ret = scnprintf(buf, PAGE_SIZE, "%u\n", hwmon->power_max_initial_value);
> > +
> > +	return ret;
> > +}
> > +
> > +static ssize_t
> > +i915_power_min_limit_show(struct device *dev, struct device_attribute *attr,
> > +			  char *buf)
> > +{
> > +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> > +	struct intel_uncore *uncore = &i915->uncore;
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	ssize_t ret;
> > +	u32 uval;
> > +
> > +	/*
> > +	 * This is a 64-bit register but the individual fields are under 32 bits
> > +	 * in size even after scaling.
> > +	 * The UAPI specifies a size of 32 bits.
> > +	 * The UAPI specifies that 0 should be returned if unsupported.
> > +	 * So, using u32 and %u is sufficient.
> > +	 */
> > +	if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku))
> > +		uval = (u32)_field_read64_and_scale(uncore,
> > +						    hwmon->rg.pkg_power_sku,
> > +						    PKG_MIN_PWR,
> > +						    hwmon->scl_shift_power,
> > +						    SF_POWER);
> > +	else
> > +		uval = 0;
> > +
> > +	ret = scnprintf(buf, PAGE_SIZE, "%u\n", uval);
> > +
> > +	return ret;
> > +}
> > +
> > +static ssize_t
> > +i915_power_max_limit_show(struct device *dev, struct device_attribute *attr,
> > +			  char *buf)
> > +{
> > +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> > +	struct intel_uncore *uncore = &i915->uncore;
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	ssize_t ret;
> > +	u32 uval;
> > +
> > +	/*
> > +	 * This is a 64-bit register but the individual fields are under 32 bits
> > +	 * in size even after scaling.
> > +	 * The UAPI specifies a size of 32 bits.
> > +	 * The UAPI specifies that UINT_MAX should be returned if unsupported.
> > +	 * So, using u32 and %u is sufficient.
> > +	 */
> > +	if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku))
> > +		uval = (u32)_field_read64_and_scale(uncore,
> > +						    hwmon->rg.pkg_power_sku,
> > +						    PKG_MAX_PWR,
> > +						    hwmon->scl_shift_power,
> > +						    SF_POWER);
> > +	else
> > +		uval = UINT_MAX;
> > +
> > +	ret = scnprintf(buf, PAGE_SIZE, "%u\n", uval);
> > +
> > +	return ret;
> > +}
> > +
> > +static SENSOR_DEVICE_ATTR(power1_max_enable, 0664,
> > +			  i915_power1_max_enable_show,
> > +			  i915_power1_max_enable_store, 0);
> > +static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
> > +			  i915_power1_max_interval_show,
> > +			  i915_power1_max_interval_store, 0);
> > +static SENSOR_DEVICE_ATTR(power1_cap_enable, 0664,
> > +			  i915_power1_cap_enable_show,
> > +			  i915_power1_cap_enable_store, 0);
> > +static SENSOR_DEVICE_ATTR(power_default_limit, 0444,
> > +			  i915_power_default_limit_show, NULL, 0);
> > +static SENSOR_DEVICE_ATTR(power_min_limit, 0444,
> > +			  i915_power_min_limit_show, NULL, 0);
> > +static SENSOR_DEVICE_ATTR(power_max_limit, 0444,
> > +			  i915_power_max_limit_show, NULL, 0);
> > +static SENSOR_DEVICE_ATTR(energy1_input, 0444,
> > +			  i915_energy1_input_show, NULL, 0);
> > +
> > +static struct attribute *hwmon_attributes[] = {
> > +	&sensor_dev_attr_power1_max_enable.dev_attr.attr,
> > +	&sensor_dev_attr_power1_max_interval.dev_attr.attr,
> > +	&sensor_dev_attr_power1_cap_enable.dev_attr.attr,
> > +	&sensor_dev_attr_power_default_limit.dev_attr.attr,
> > +	&sensor_dev_attr_power_min_limit.dev_attr.attr,
> > +	&sensor_dev_attr_power_max_limit.dev_attr.attr,
> > +	&sensor_dev_attr_energy1_input.dev_attr.attr,
> > +	NULL
> > +};
> > +
> > +static umode_t hwmon_attributes_visible(struct kobject *kobj,
> > +					struct attribute *attr, int index)
> > +{
> > +	struct device *dev = kobj_to_dev(kobj);
> > +	struct drm_i915_private *i915 = dev_get_drvdata(dev);
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	i915_reg_t rgadr;
> > +
> > +	if (attr == &sensor_dev_attr_energy1_input.dev_attr.attr)
> > +		rgadr = hwmon->rg.reg_energy_status;
> > +	else if (attr == &sensor_dev_attr_power1_max_enable.dev_attr.attr)
> > +		rgadr = hwmon->rg.pkg_rapl_limit;
> > +	else if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
> > +		rgadr = hwmon->rg.pkg_rapl_limit;
> > +	else if (attr == &sensor_dev_attr_power1_cap_enable.dev_attr.attr)
> > +		rgadr = hwmon->rg.pkg_rapl_limit_udw;
> > +	else if (attr == &sensor_dev_attr_power_default_limit.dev_attr.attr)
> > +		rgadr = hwmon->rg.pkg_rapl_limit;
> > +	else if (attr == &sensor_dev_attr_power_min_limit.dev_attr.attr)
> > +		return attr->mode;
> > +	else if (attr == &sensor_dev_attr_power_max_limit.dev_attr.attr)
> > +		return attr->mode;
> > +	else
> > +		return 0;
> > +
> > +	if (!i915_mmio_reg_valid(rgadr))
> > +		return 0;
> > +
> > +	return attr->mode;
> > +}
> > +
> > +static const struct attribute_group hwmon_attrgroup = {
> > +	.attrs = hwmon_attributes,
> > +	.is_visible = hwmon_attributes_visible,
> > +};
> > +
> > +static const struct attribute_group *hwmon_groups[] = {
> > +	&hwmon_attrgroup,
> > +	NULL
> > +};
> > +
> > +/*
> > + * HWMON SENSOR TYPE = hwmon_power
> > + *  - Sustained Power (power1_max)
> > + *  - Burst power     (power1_cap)
> > + *  - Peak power      (power1_crit)
> > + */
> > +static const u32 i915_config_power[] = {
> > +	HWMON_P_CAP | HWMON_P_MAX,
> > +	0
> > +};
> > +
> > +static const struct hwmon_channel_info i915_power = {
> > +	.type = hwmon_power,
> > +	.config = i915_config_power,
> > +};
> > +
> > +static const struct hwmon_channel_info *i915_info[] = {
> > +	&i915_power,
> > +	NULL
> > +};
> > +
> > +static umode_t
> > +i915_power_is_visible(const struct drm_i915_private *i915, u32 attr, int chan)
> > +{
> > +	i915_reg_t rgadr;
> > +
> > +	switch (attr) {
> > +	case hwmon_power_max:
> > +		rgadr = i915->hwmon.rg.pkg_rapl_limit;
> > +		break;
> > +	case hwmon_power_cap:
> > +		rgadr = i915->hwmon.rg.pkg_rapl_limit_udw;
> > +		break;
> > +	default:
> > +		return 0;
> > +	}
> > +
> > +	if (!i915_mmio_reg_valid(rgadr))
> > +		return 0;
> > +
> > +	return 0664;
> > +}
> > +
> > +static int
> > +i915_power_read(struct drm_i915_private *i915, u32 attr, int chan, long *val)
> > +{
> > +	struct intel_uncore *uncore = &i915->uncore;
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	int ret = 0;
> > +
> > +	switch (attr) {
> > +	case hwmon_power_max:
> > +		*val = (long)_field_read_and_scale(uncore,
> > +						   hwmon->rg.pkg_rapl_limit,
> > +						   PKG_PWR_LIM_1,
> > +						   hwmon->scl_shift_power,
> > +						   SF_POWER);
> > +		break;
> > +	case hwmon_power_cap:
> > +		*val = (long)_field_read_and_scale(uncore,
> > +						   hwmon->rg.pkg_rapl_limit_udw,
> > +						   PKG_PWR_LIM_2,
> > +						   hwmon->scl_shift_power,
> > +						   SF_POWER);
> > +		break;
> > +	default:
> > +		ret = -EOPNOTSUPP;
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static int
> > +i915_power_write(struct drm_i915_private *i915, u32 attr, int chan, long val)
> > +{
> > +	struct intel_uncore *uncore = &i915->uncore;
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	int ret = 0;
> > +
> > +	switch (attr) {
> > +	case hwmon_power_max:
> > +		_field_scale_and_write(uncore,
> > +				       hwmon->rg.pkg_rapl_limit,
> > +				       PKG_PWR_LIM_1,
> > +				       hwmon->scl_shift_power,
> > +				       SF_POWER, val);
> > +		break;
> > +	case hwmon_power_cap:
> > +		_field_scale_and_write(uncore,
> > +				       hwmon->rg.pkg_rapl_limit_udw,
> > +				       PKG_PWR_LIM_2,
> > +				       hwmon->scl_shift_power,
> > +				       SF_POWER, val);
> > +		break;
> > +	default:
> > +		ret = -EOPNOTSUPP;
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static umode_t
> > +i915_is_visible(const void *data, enum hwmon_sensor_types type,
> > +		u32 attr, int channel)
> > +{
> > +	struct drm_i915_private *i915 = (struct drm_i915_private *)data;
> > +
> > +	switch (type) {
> > +	case hwmon_power:
> > +		return i915_power_is_visible(i915, attr, channel);
> > +	default:
> > +		return 0;
> > +	}
> > +}
> > +
> > +static int
> > +i915_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
> > +	  int channel, long *val)
> > +{
> > +	struct drm_i915_private *i915 = kdev_to_i915(dev);
> > +
> > +	switch (type) {
> > +	case hwmon_power:
> > +		return i915_power_read(i915, attr, channel, val);
> > +	default:
> > +		return -EOPNOTSUPP;
> > +	}
> > +}
> > +
> > +static int
> > +i915_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
> > +	   int channel, long val)
> > +{
> > +	struct drm_i915_private *i915 = kdev_to_i915(dev);
> > +
> > +	switch (type) {
> > +	case hwmon_power:
> > +		return i915_power_write(i915, attr, channel, val);
> > +	default:
> > +		return -EOPNOTSUPP;
> > +	}
> > +}
> > +
> > +static const struct hwmon_ops i915_hwmon_ops = {
> > +	.is_visible = i915_is_visible,
> > +	.read = i915_read,
> > +	.write = i915_write,
> > +};
> > +
> > +static const struct hwmon_chip_info i915_chip_info = {
> > +	.ops = &i915_hwmon_ops,
> > +	.info = i915_info,
> > +};
> > +
> > +static void
> > +i915_hwmon_get_preregistration_info(struct drm_i915_private *i915)
> > +{
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	struct intel_uncore *uncore = &i915->uncore;
> > +	intel_wakeref_t wakeref;
> > +	u32 val_sku_unit;
> > +	__le32 le_sku_unit;
> > +
> > +	if (IS_DG1(i915)) {
> > +		hwmon->rg.pkg_power_sku_unit = PCU_PACKAGE_POWER_SKU_UNIT;
> > +		hwmon->rg.pkg_power_sku = PCU_PACKAGE_POWER_SKU;
> > +		hwmon->rg.pkg_energy_status = PCU_PACKAGE_ENERGY_STATUS;
> > +		hwmon->rg.pkg_rapl_limit = PCU_PACKAGE_RAPL_LIMIT;
> > +		hwmon->rg.pkg_rapl_limit_udw = PCU_PACKAGE_RAPL_LIMIT_UDW;
> > +		hwmon->rg.plt_energy_status = PCU_PLATFORM_ENERGY_STATUS;
> > +	} else {
> > +		hwmon->rg.pkg_power_sku_unit = INVALID_MMIO_REG;
> > +		hwmon->rg.pkg_power_sku = INVALID_MMIO_REG;
> > +		hwmon->rg.pkg_energy_status = INVALID_MMIO_REG;
> > +		hwmon->rg.pkg_rapl_limit = INVALID_MMIO_REG;
> > +		hwmon->rg.pkg_rapl_limit_udw = INVALID_MMIO_REG;
> > +		hwmon->rg.plt_energy_status = INVALID_MMIO_REG;
> > +	}
> > +
> > +	/*
> > +	 * If a platform does not support *_PLATFORM_ENERGY_STATUS,
> > +	 * try *PACKAGE_ENERGY_STATUS.
> > +	 */
> > +	if (i915_mmio_reg_valid(hwmon->rg.plt_energy_status))
> > +		hwmon->rg.reg_energy_status = hwmon->rg.plt_energy_status;
> > +	else
> > +		hwmon->rg.reg_energy_status = hwmon->rg.pkg_energy_status;
> > +
> > +	wakeref = intel_runtime_pm_get(uncore->rpm);
> > +
> > +	/*
> > +	 * The contents of register hwmon->rg.pkg_power_sku_unit do not change,
> > +	 * so read it once and store the shift values.
> > +	 */
> > +	if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku_unit))
> > +		val_sku_unit = intel_uncore_read(uncore,
> > +						 hwmon->rg.pkg_power_sku_unit);
> > +	else
> > +		val_sku_unit = 0;
> > +
> > +	hwmon->energy_counter_overflow = 0;
> > +
> > +	if (i915_mmio_reg_valid(hwmon->rg.reg_energy_status))
> > +		hwmon->energy_counter_prev =
> > +			intel_uncore_read(uncore, hwmon->rg.reg_energy_status);
> > +	else
> > +		hwmon->energy_counter_prev = 0;
> > +
> > +	intel_runtime_pm_put(uncore->rpm, wakeref);
> > +
> > +	le_sku_unit = cpu_to_le32(val_sku_unit);
> > +	hwmon->scl_shift_power = le32_get_bits(le_sku_unit, PKG_PWR_UNIT);
> > +	hwmon->scl_shift_energy = le32_get_bits(le_sku_unit, PKG_ENERGY_UNIT);
> > +	hwmon->scl_shift_time = le32_get_bits(le_sku_unit, PKG_TIME_UNIT);
> > +
> > +	/*
> > +	 * There is no direct way to obtain the power default_limit.
> > +	 * The best known workaround is to use the initial value of power1_max.
> > +	 *
> > +	 * The value of power1_max is reset to the default on reboot, but is
> > +	 * not reset by a module unload/load sequence.  To allow proper
> > +	 * functioning after a module reload, the value for power1_max is
> > +	 * restored to its original value at module unload time in
> > +	 * i915_hwmon_fini().
> > +	 */
> > +	hwmon->power_max_initial_value =
> > +		(u32)_field_read_and_scale(uncore,
> > +					   hwmon->rg.pkg_rapl_limit,
> > +					   PKG_PWR_LIM_1,
> > +					   hwmon->scl_shift_power, SF_POWER);
> > +}
> > +
> > +int i915_hwmon_init(struct drm_device *drm_dev)
> > +{
> > +	struct drm_i915_private *i915 = to_i915(drm_dev);
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +	struct device *hwmon_dev;
> > +
> > +	mutex_init(&hwmon->hwmon_lock);
> > +
> > +	i915_hwmon_get_preregistration_info(i915);
> > +
> > +	hwmon_dev = hwmon_device_register_with_info(drm_dev->dev, "i915",
> > +						    drm_dev,
> > +						    &i915_chip_info,
> > +						    hwmon_groups);
> > +
> > +	if (IS_ERR(hwmon_dev)) {
> > +		mutex_destroy(&hwmon->hwmon_lock);
> > +		return PTR_ERR(hwmon_dev);
> > +	}
> > +
> > +	hwmon->dev = hwmon_dev;
> > +
> > +	return 0;
> > +}
> > +
> > +void i915_hwmon_fini(struct drm_device *drm_dev)
> > +{
> > +	struct drm_i915_private *i915 = to_i915(drm_dev);
> > +	struct i915_hwmon *hwmon = &i915->hwmon;
> > +
> > +	if (hwmon->power_max_initial_value) {
> > +		/* Restore power1_max. */
> > +		_field_scale_and_write(&i915->uncore, hwmon->rg.pkg_rapl_limit,
> > +				       PKG_PWR_LIM_1, hwmon->scl_shift_power,
> > +				       SF_POWER,
> > +				       hwmon->power_max_initial_value);
> > +	}
> > +
> > +	if (hwmon->dev)
> > +		hwmon_device_unregister(hwmon->dev);
> > +
> > +	mutex_destroy(&hwmon->hwmon_lock);
> > +
> > +	memset(hwmon, 0, sizeof(*hwmon));
> > +}
> > diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h
> > new file mode 100644
> > index 0000000000000..0be919f0a463d
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/i915_hwmon.h
> > @@ -0,0 +1,41 @@
> > +/* SPDX-License-Identifier: MIT */
> > +
> > +/*
> > + * Copyright © 2020 Intel Corporation
> > + */
> > +
> > +#ifndef __INTEL_HWMON_H__
> > +#define __INTEL_HWMON_H__
> > +
> > +#include <drm/drm_device.h>
> 
> A forward declaration should be enough. Need <linux/types.h> though.

Done.

> 
> > +#include "i915_reg.h"
> > +
> > +struct i915_hwmon_reg {
> > +	i915_reg_t pkg_power_sku_unit;
> > +	i915_reg_t pkg_power_sku;
> > +	i915_reg_t pkg_energy_status;
> > +	i915_reg_t pkg_rapl_limit;
> > +	i915_reg_t pkg_rapl_limit_udw;
> > +	i915_reg_t plt_energy_status;
> > +	i915_reg_t reg_energy_status;
> > +};
> > +
> > +struct i915_hwmon {
> > +	struct device *dev;
> > +	struct mutex hwmon_lock;	/* counter overflow logic and rmw */
> > +
> > +	struct i915_hwmon_reg rg;
> > +
> > +	u32 energy_counter_overflow;
> > +	u32 energy_counter_prev;
> > +	u32 power_max_initial_value;
> > +
> > +	int scl_shift_power;
> > +	int scl_shift_energy;
> > +	int scl_shift_time;
> > +};
> > +
> > +int i915_hwmon_init(struct drm_device *drm_dev);
> > +void i915_hwmon_fini(struct drm_device *drm_dev);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index f80d656331f42..62fccf71ddad6 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -4071,6 +4071,59 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
> >  #define BXT_RP_STATE_CAP        _MMIO(0x138170)
> >  #define GEN9_RP_STATE_LIMITS	_MMIO(0x138148)
> >  
> > +/* DG1 */
> > +
> > +/* based on MCHBAR_MIRROR_BASE_SNB == 0x140000 */
> > +#define PCU_PACKAGE_POWER_SKU_UNIT	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5938)
> > +#define PCU_PACKAGE_ENERGY_STATUS	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x593c)
> > +#define PCU_PACKAGE_RAPL_LIMIT		_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
> > +#define PCU_PACKAGE_RAPL_LIMIT_UDW	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x59a4)
> > +#define PCU_PACKAGE_POWER_SKU		INVALID_MMIO_REG
> > +#define PCU_PLATFORM_ENERGY_STATUS	INVALID_MMIO_REG
> > +
> > +/* Fields for *_PACKAGE_RAPL_LIMIT: */
> > +#define   PKG_PWR_LIM_1			REG_GENMASK(14, 0)
> > +#define   PKG_PWR_LIM_1_EN		REG_BIT(15)
> > +#define   PKG_PWR_LIM_1_TIME		REG_GENMASK(23, 17)
> > +
> > +/*
> > + * Fields for *_PACKAGE_RAPL_LIMIT_UDW:
> > + * In docs, these fields may be defined relative to the entire 64-bit
> > + * register, but here they are defined relative to the 32-bit boundary.
> > + */
> > +#define   PKG_PWR_LIM_2			REG_GENMASK(14, 0)	// 46:32
> > +#define   PKG_PWR_LIM_2_EN		REG_BIT(15)		// 47:47
> > +#define   PKG_PWR_LIM_2_TIME		REG_GENMASK(23, 17)	// 55:49
> > +
> > +/*
> > + * *_PACKAGE_POWER_SKU_UNIT - fields specifying scaling for PCU quantities.
> > + * - PKG_PWR_UNIT - Power Units used for power control registers. The
> > + *   actual unit value is calculated by 1 W / Power(2,PKG_PWR_UNIT).
> > + * - PKG_ENERGY_UNIT - Energy Units used for power control registers. The
> > + *   actual unit value is calculated by 1 J / Power(2,PKG_ENERGY_UNIT).
> > + * - PKG_TIME_UNIT - Time Units used for power control registers. The
> > + *   actual unit value is calculated by 1 s / Power(2,PKG_TIME_UNIT).
> > + */
> > +#define   PKG_PWR_UNIT			REG_GENMASK(3, 0)
> > +#define   PKG_ENERGY_UNIT		REG_GENMASK(12, 8)
> > +#define   PKG_TIME_UNIT			REG_GENMASK(19, 16)
> > +
> > +/*
> > + * *_PACKAGE_POWER_SKU - SKU power and timing parameters.
> > + * Used herein as a 64-bit bit register.
> > + * These masks are defined using GENMASK_ULL as REG_GENMASK is limited to u32
> > + * and as GENMASK is "long" and therefore 32-bits on a 32-bit system.
> > + * PKG_PKG_TDP, PKG_MIN_PWR, and PKG_MAX_PWR are scaled in the same way as
> > + * PKG_PWR_LIM_*, above.
> > + * PKG_MAX_WIN has sub-fields for x and y, and has the value: is 1.x * 2^y.
> > + */
> > +#define   PKG_PKG_TDP			GENMASK_ULL(14, 0)
> > +#define   PKG_MIN_PWR			GENMASK_ULL(30, 16)
> > +#define   PKG_MAX_PWR			GENMASK_ULL(46, 32)
> > +#define   PKG_MAX_WIN			GENMASK_ULL(54, 48)
> > +#define     PKG_MAX_WIN_Y		GENMASK_ULL(54, 53)
> > +#define     PKG_MAX_WIN_X		GENMASK_ULL(52, 48)
> > +
> >  /*
> >   * Logical Context regs
> >   */
> 
> -- 
> Jani Nikula, Intel Open Source Graphics Center

V3 of this patch will arrive on this mail list shortly.

Thanks for your comments.

-Dale
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 1e1cb245fca77..ec8d5a0d7ea96 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -14,6 +14,7 @@  config DRM_I915
 	select DRM_MIPI_DSI
 	select RELAY
 	select IRQ_WORK
+	select HWMON
 	# i915 depends on ACPI_VIDEO when ACPI is enabled
 	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
 	select BACKLIGHT_CLASS_DEVICE if ACPI
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index d0d936d9137bc..e213e2b129e20 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -37,6 +37,7 @@  i915-y += i915_drv.o \
 	  i915_config.o \
 	  i915_irq.o \
 	  i915_getparam.o \
+	  i915_hwmon.o \
 	  i915_mitigations.o \
 	  i915_params.o \
 	  i915_pci.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 305557e1942aa..84c7de3b34c7d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -69,6 +69,7 @@ 
 
 #include "i915_debugfs.h"
 #include "i915_drv.h"
+#include "i915_hwmon.h"
 #include "i915_ioc32.h"
 #include "i915_irq.h"
 #include "i915_memcpy.h"
@@ -675,6 +676,10 @@  static void i915_driver_register(struct drm_i915_private *dev_priv)
 	i915_debugfs_register(dev_priv);
 	i915_setup_sysfs(dev_priv);
 
+	/* Register with hwmon */
+	if (i915_hwmon_init(&dev_priv->drm))
+		drm_err(&dev_priv->drm, "Failed to register driver hwmon!\n");
+
 	/* Depends on sysfs having been initialized */
 	i915_perf_register(dev_priv);
 
@@ -709,9 +714,13 @@  static void i915_driver_unregister(struct drm_i915_private *dev_priv)
 	intel_gt_driver_unregister(&dev_priv->gt);
 
 	i915_perf_unregister(dev_priv);
+
+	i915_hwmon_fini(&dev_priv->drm);
+
 	i915_pmu_unregister(dev_priv);
 
 	i915_teardown_sysfs(dev_priv);
+
 	drm_dev_unplug(&dev_priv->drm);
 
 	i915_gem_driver_unregister(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 69e43bf91a153..7e9b452c77e2b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -61,6 +61,7 @@ 
 #include <drm/drm_connector.h>
 #include <drm/i915_mei_hdcp_interface.h>
 
+#include "i915_hwmon.h"
 #include "i915_params.h"
 #include "i915_reg.h"
 #include "i915_utils.h"
@@ -1109,6 +1110,8 @@  struct drm_i915_private {
 
 	struct i915_perf perf;
 
+	struct i915_hwmon hwmon;
+
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct intel_gt gt;
 
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
new file mode 100644
index 0000000000000..ab8f32f7ed1de
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -0,0 +1,788 @@ 
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+/*
+ * Power-related hwmon entries.
+ */
+
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/types.h>
+
+#include "i915_drv.h"
+#include "gt/intel_gt.h"
+#include "i915_hwmon.h"
+
+/*
+ * SF_* - scale factors for particular quantities.
+ * The hwmon standard says that quantities of the given types are specified
+ * in the given units:
+ * - time   - milliseconds
+ * - power  - microwatts
+ * - energy - microjoules
+ */
+
+#define SF_TIME		   1000
+#define SF_POWER	1000000
+#define SF_ENERGY	1000000
+
+static void
+_locked_with_pm_intel_uncore_rmw(struct intel_uncore *uncore,
+				 i915_reg_t reg, u32 clear, u32 set)
+{
+	struct drm_i915_private *i915 = uncore->i915;
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	intel_wakeref_t wakeref;
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	with_intel_runtime_pm(uncore->rpm, wakeref)
+		intel_uncore_rmw(uncore, reg, clear, set);
+
+	mutex_unlock(&hwmon->hwmon_lock);
+}
+
+/*
+ * _field_read_and_scale()
+ * Return type of u64 allows for the case where the scaling might cause a
+ * result exceeding 32 bits.
+ */
+static __always_inline u64
+_field_read_and_scale(struct intel_uncore *uncore, i915_reg_t rgadr,
+		      u32 field_msk, int nshift, unsigned int scale_factor)
+{
+	intel_wakeref_t wakeref;
+	u32 reg_value;
+	u64 scaled_val;
+
+	with_intel_runtime_pm(uncore->rpm, wakeref)
+		reg_value = intel_uncore_read(uncore, rgadr);
+
+	reg_value = le32_get_bits(cpu_to_le32(reg_value), field_msk);
+	scaled_val = mul_u32_u32(scale_factor, reg_value);
+
+	/* Shift, rounding to nearest */
+	if (nshift > 0)
+		scaled_val = (scaled_val + (1 << (nshift - 1))) >> nshift;
+
+	return scaled_val;
+}
+
+/*
+ * _field_read64_and_scale() - read a 64-bit register and scale.
+ */
+static __always_inline u64
+_field_read64_and_scale(struct intel_uncore *uncore, i915_reg_t rgadr,
+			u64 field_msk, int nshift, unsigned int scale_factor)
+{
+	intel_wakeref_t wakeref;
+	u64 reg_value;
+	u64 scaled_val;
+
+	with_intel_runtime_pm(uncore->rpm, wakeref)
+		reg_value = intel_uncore_read64(uncore, rgadr);
+
+	reg_value = le64_get_bits(cpu_to_le64(reg_value), field_msk);
+	scaled_val = scale_factor * reg_value;
+
+	/* Shift, rounding to nearest */
+	if (nshift > 0)
+		scaled_val = (scaled_val + (1 << (nshift - 1))) >> nshift;
+
+	return scaled_val;
+}
+
+/*
+ * _field_scale_and_write()
+ */
+static __always_inline void
+_field_scale_and_write(struct intel_uncore *uncore,
+		       i915_reg_t rgadr,
+		       u32 field_msk, int nshift,
+		       unsigned int scale_factor, long lval)
+{
+	u32 nval;
+	u32 bits_to_clear;
+	u32 bits_to_set;
+
+	/* Computation in 64-bits to avoid overflow. Round to nearest. */
+	nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor);
+
+	bits_to_clear = field_msk;
+	bits_to_set = le32_to_cpu(le32_encode_bits(nval, field_msk));
+
+	_locked_with_pm_intel_uncore_rmw(uncore, rgadr,
+					 bits_to_clear, bits_to_set);
+}
+
+/*
+ * i915_energy1_input_show - A custom function to obtain energy1_input.
+ * Use a custom function instead of the usual hwmon helpers in order to
+ * guarantee 64-bits of result to user-space.
+ * Units are microjoules.
+ *
+ * The underlying hardware register is 32-bits and is subject to overflow.
+ * This function compensates for overflow of the 32-bit register by detecting
+ * wrap-around and incrementing an overflow counter.
+ * This only works if the register is sampled often enough to avoid
+ * missing an instance of overflow - achieved either by repeated
+ * queries through the API, or via a possible timer (future - TBD) that
+ * ensures values are read often enough to catch all overflows.
+ *
+ * How long before overflow?  For example, with an example scaling bit
+ * shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and a power draw of
+ * 1000 watts, the 32-bit counter will overflow in approximately 4.36 minutes.
+ *
+ * Examples:
+ *    1 watt:  (2^32 >> 14) /    1 W / (60 * 60 * 24) secs/day -> 3 days
+ * 1000 watts: (2^32 >> 14) / 1000 W / 60             secs/min -> 4.36 minutes
+ */
+static ssize_t
+i915_energy1_input_show(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	struct drm_i915_private *i915 = dev_get_drvdata(dev);
+	struct intel_uncore *uncore = &i915->uncore;
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	int nshift = hwmon->scl_shift_energy;
+	ssize_t ret;
+	intel_wakeref_t wakeref;
+	u32 reg_value;
+	u64 vlo;
+	u64 vhi;
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	with_intel_runtime_pm(uncore->rpm, wakeref)
+		reg_value = intel_uncore_read(uncore,
+					      hwmon->rg.reg_energy_status);
+
+	/*
+	 * The u32 register concatenated with the u32 overflow counter
+	 * gives an effective energy counter size of 64-bits.  However, the
+	 * computations below are done modulo 2^96 to avoid overflow during
+	 * scaling in the conversion to microjoules.
+	 *
+	 * The low-order 64-bits of the resulting quantity are returned to
+	 * the caller in units of microjoules, encoded into a decimal string.
+	 *
+	 * For a power of 1000 watts, 64 bits in units of microjoules will
+	 * overflow after 584 years.
+	 */
+
+	if (hwmon->energy_counter_prev > reg_value)
+		hwmon->energy_counter_overflow++;
+
+	hwmon->energy_counter_prev = reg_value;
+
+	/*
+	 * 64-bit variables vlo and vhi are used for the scaling process.
+	 * The 96-bit counter value is composed from the two 64-bit variables
+	 * vhi and vlo thusly:  counter == vhi << 32 + vlo .
+	 * The 32-bits of overlap between the two variables is convenient for
+	 * handling overflows out of vlo.
+	 */
+
+	vlo = reg_value;
+	vhi = hwmon->energy_counter_overflow;
+
+	mutex_unlock(&hwmon->hwmon_lock);
+
+	vlo = SF_ENERGY * vlo;
+
+	/* Prepare to round to nearest */
+	if (nshift > 0)
+		vlo += 1 << (nshift - 1);
+
+	/*
+	 * Anything in the upper-32 bits of vlo gets added into vhi here,
+	 * and then cleared from vlo.
+	 */
+	vhi = (SF_ENERGY * vhi) + (vlo >> 32);
+	vlo &= 0xffffffffULL;
+
+	/*
+	 * Apply the right shift.
+	 * - vlo shifted by itself.
+	 * - vlo receiving what's shifted out of vhi.
+	 * - vhi shifted by itself
+	 */
+	vlo = vlo >> nshift;
+	vlo |= (vhi << (32 - nshift)) & 0xffffffffULL;
+	vhi = vhi >> nshift;
+
+	/* Combined to get a 64-bit result in vlo. */
+	vlo |= (vhi << 32);
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu\n", vlo);
+
+	return ret;
+}
+
+static ssize_t
+i915_power1_max_enable_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct drm_i915_private *i915 = dev_get_drvdata(dev);
+	struct intel_uncore *uncore = &i915->uncore;
+	intel_wakeref_t wakeref;
+	ssize_t ret;
+	u32 reg_value;
+	bool is_enabled;
+
+	with_intel_runtime_pm(uncore->rpm, wakeref)
+		reg_value = intel_uncore_read(uncore,
+					      i915->hwmon.rg.pkg_rapl_limit);
+
+	is_enabled = !!(reg_value & PKG_PWR_LIM_1_EN);
+
+	ret = scnprintf(buf, PAGE_SIZE, "%u\n", is_enabled);
+
+	return ret;
+}
+
+static ssize_t
+i915_power1_max_enable_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct drm_i915_private *i915 = dev_get_drvdata(dev);
+	struct intel_uncore *uncore = &i915->uncore;
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	ssize_t ret;
+	u32 val;
+	u32 bits_to_clear;
+	u32 bits_to_set;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	bits_to_clear = PKG_PWR_LIM_1_EN;
+	if (!val)
+		bits_to_set = 0;
+	else
+		bits_to_set = PKG_PWR_LIM_1_EN;
+
+	_locked_with_pm_intel_uncore_rmw(uncore, hwmon->rg.pkg_rapl_limit,
+					 bits_to_clear, bits_to_set);
+
+	return count;
+}
+
+static ssize_t
+i915_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	struct drm_i915_private *i915 = dev_get_drvdata(dev);
+	struct intel_uncore *uncore = &i915->uncore;
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	ssize_t ret;
+	u64 ullval;
+
+	ullval = _field_read_and_scale(uncore, hwmon->rg.pkg_rapl_limit,
+				       PKG_PWR_LIM_1_TIME,
+				       hwmon->scl_shift_time, SF_TIME);
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu\n", ullval);
+
+	return ret;
+}
+
+static ssize_t
+i915_power1_max_interval_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct drm_i915_private *i915 = dev_get_drvdata(dev);
+	struct intel_uncore *uncore = &i915->uncore;
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	ssize_t ret;
+	long val;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	_field_scale_and_write(uncore, hwmon->rg.pkg_rapl_limit,
+			       PKG_PWR_LIM_2_TIME,
+			       hwmon->scl_shift_time, SF_TIME, val);
+
+	return count;
+}
+
+static ssize_t
+i915_power1_cap_enable_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct drm_i915_private *i915 = dev_get_drvdata(dev);
+	struct intel_uncore *uncore = &i915->uncore;
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	intel_wakeref_t wakeref;
+	ssize_t ret;
+	u32 reg_value;
+	bool is_enabled;
+
+	with_intel_runtime_pm(uncore->rpm, wakeref)
+		reg_value = intel_uncore_read(uncore,
+					      hwmon->rg.pkg_rapl_limit_udw);
+
+	is_enabled = !!(reg_value & PKG_PWR_LIM_2_EN);
+
+	ret = scnprintf(buf, PAGE_SIZE, "%u\n", is_enabled);
+
+	return ret;
+}
+
+static ssize_t
+i915_power1_cap_enable_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct drm_i915_private *i915 = dev_get_drvdata(dev);
+	struct intel_uncore *uncore = &i915->uncore;
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	ssize_t ret;
+	u32 val;
+	u32 bits_to_clear;
+	u32 bits_to_set;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	bits_to_clear = PKG_PWR_LIM_2_EN;
+	if (!val)
+		bits_to_set = 0;
+	else
+		bits_to_set = PKG_PWR_LIM_2_EN;
+
+	_locked_with_pm_intel_uncore_rmw(uncore, hwmon->rg.pkg_rapl_limit_udw,
+					 bits_to_clear, bits_to_set);
+
+	return count;
+}
+
+static ssize_t
+i915_power_default_limit_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	struct drm_i915_private *i915 = dev_get_drvdata(dev);
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	ssize_t ret;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%u\n", hwmon->power_max_initial_value);
+
+	return ret;
+}
+
+static ssize_t
+i915_power_min_limit_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct drm_i915_private *i915 = dev_get_drvdata(dev);
+	struct intel_uncore *uncore = &i915->uncore;
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	ssize_t ret;
+	u32 uval;
+
+	/*
+	 * This is a 64-bit register but the individual fields are under 32 bits
+	 * in size even after scaling.
+	 * The UAPI specifies a size of 32 bits.
+	 * The UAPI specifies that 0 should be returned if unsupported.
+	 * So, using u32 and %u is sufficient.
+	 */
+	if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku))
+		uval = (u32)_field_read64_and_scale(uncore,
+						    hwmon->rg.pkg_power_sku,
+						    PKG_MIN_PWR,
+						    hwmon->scl_shift_power,
+						    SF_POWER);
+	else
+		uval = 0;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%u\n", uval);
+
+	return ret;
+}
+
+static ssize_t
+i915_power_max_limit_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct drm_i915_private *i915 = dev_get_drvdata(dev);
+	struct intel_uncore *uncore = &i915->uncore;
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	ssize_t ret;
+	u32 uval;
+
+	/*
+	 * This is a 64-bit register but the individual fields are under 32 bits
+	 * in size even after scaling.
+	 * The UAPI specifies a size of 32 bits.
+	 * The UAPI specifies that UINT_MAX should be returned if unsupported.
+	 * So, using u32 and %u is sufficient.
+	 */
+	if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku))
+		uval = (u32)_field_read64_and_scale(uncore,
+						    hwmon->rg.pkg_power_sku,
+						    PKG_MAX_PWR,
+						    hwmon->scl_shift_power,
+						    SF_POWER);
+	else
+		uval = UINT_MAX;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%u\n", uval);
+
+	return ret;
+}
+
+static SENSOR_DEVICE_ATTR(power1_max_enable, 0664,
+			  i915_power1_max_enable_show,
+			  i915_power1_max_enable_store, 0);
+static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
+			  i915_power1_max_interval_show,
+			  i915_power1_max_interval_store, 0);
+static SENSOR_DEVICE_ATTR(power1_cap_enable, 0664,
+			  i915_power1_cap_enable_show,
+			  i915_power1_cap_enable_store, 0);
+static SENSOR_DEVICE_ATTR(power_default_limit, 0444,
+			  i915_power_default_limit_show, NULL, 0);
+static SENSOR_DEVICE_ATTR(power_min_limit, 0444,
+			  i915_power_min_limit_show, NULL, 0);
+static SENSOR_DEVICE_ATTR(power_max_limit, 0444,
+			  i915_power_max_limit_show, NULL, 0);
+static SENSOR_DEVICE_ATTR(energy1_input, 0444,
+			  i915_energy1_input_show, NULL, 0);
+
+static struct attribute *hwmon_attributes[] = {
+	&sensor_dev_attr_power1_max_enable.dev_attr.attr,
+	&sensor_dev_attr_power1_max_interval.dev_attr.attr,
+	&sensor_dev_attr_power1_cap_enable.dev_attr.attr,
+	&sensor_dev_attr_power_default_limit.dev_attr.attr,
+	&sensor_dev_attr_power_min_limit.dev_attr.attr,
+	&sensor_dev_attr_power_max_limit.dev_attr.attr,
+	&sensor_dev_attr_energy1_input.dev_attr.attr,
+	NULL
+};
+
+static umode_t hwmon_attributes_visible(struct kobject *kobj,
+					struct attribute *attr, int index)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_i915_private *i915 = dev_get_drvdata(dev);
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	i915_reg_t rgadr;
+
+	if (attr == &sensor_dev_attr_energy1_input.dev_attr.attr)
+		rgadr = hwmon->rg.reg_energy_status;
+	else if (attr == &sensor_dev_attr_power1_max_enable.dev_attr.attr)
+		rgadr = hwmon->rg.pkg_rapl_limit;
+	else if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
+		rgadr = hwmon->rg.pkg_rapl_limit;
+	else if (attr == &sensor_dev_attr_power1_cap_enable.dev_attr.attr)
+		rgadr = hwmon->rg.pkg_rapl_limit_udw;
+	else if (attr == &sensor_dev_attr_power_default_limit.dev_attr.attr)
+		rgadr = hwmon->rg.pkg_rapl_limit;
+	else if (attr == &sensor_dev_attr_power_min_limit.dev_attr.attr)
+		return attr->mode;
+	else if (attr == &sensor_dev_attr_power_max_limit.dev_attr.attr)
+		return attr->mode;
+	else
+		return 0;
+
+	if (!i915_mmio_reg_valid(rgadr))
+		return 0;
+
+	return attr->mode;
+}
+
+static const struct attribute_group hwmon_attrgroup = {
+	.attrs = hwmon_attributes,
+	.is_visible = hwmon_attributes_visible,
+};
+
+static const struct attribute_group *hwmon_groups[] = {
+	&hwmon_attrgroup,
+	NULL
+};
+
+/*
+ * HWMON SENSOR TYPE = hwmon_power
+ *  - Sustained Power (power1_max)
+ *  - Burst power     (power1_cap)
+ *  - Peak power      (power1_crit)
+ */
+static const u32 i915_config_power[] = {
+	HWMON_P_CAP | HWMON_P_MAX,
+	0
+};
+
+static const struct hwmon_channel_info i915_power = {
+	.type = hwmon_power,
+	.config = i915_config_power,
+};
+
+static const struct hwmon_channel_info *i915_info[] = {
+	&i915_power,
+	NULL
+};
+
+static umode_t
+i915_power_is_visible(const struct drm_i915_private *i915, u32 attr, int chan)
+{
+	i915_reg_t rgadr;
+
+	switch (attr) {
+	case hwmon_power_max:
+		rgadr = i915->hwmon.rg.pkg_rapl_limit;
+		break;
+	case hwmon_power_cap:
+		rgadr = i915->hwmon.rg.pkg_rapl_limit_udw;
+		break;
+	default:
+		return 0;
+	}
+
+	if (!i915_mmio_reg_valid(rgadr))
+		return 0;
+
+	return 0664;
+}
+
+static int
+i915_power_read(struct drm_i915_private *i915, u32 attr, int chan, long *val)
+{
+	struct intel_uncore *uncore = &i915->uncore;
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	int ret = 0;
+
+	switch (attr) {
+	case hwmon_power_max:
+		*val = (long)_field_read_and_scale(uncore,
+						   hwmon->rg.pkg_rapl_limit,
+						   PKG_PWR_LIM_1,
+						   hwmon->scl_shift_power,
+						   SF_POWER);
+		break;
+	case hwmon_power_cap:
+		*val = (long)_field_read_and_scale(uncore,
+						   hwmon->rg.pkg_rapl_limit_udw,
+						   PKG_PWR_LIM_2,
+						   hwmon->scl_shift_power,
+						   SF_POWER);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
+static int
+i915_power_write(struct drm_i915_private *i915, u32 attr, int chan, long val)
+{
+	struct intel_uncore *uncore = &i915->uncore;
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	int ret = 0;
+
+	switch (attr) {
+	case hwmon_power_max:
+		_field_scale_and_write(uncore,
+				       hwmon->rg.pkg_rapl_limit,
+				       PKG_PWR_LIM_1,
+				       hwmon->scl_shift_power,
+				       SF_POWER, val);
+		break;
+	case hwmon_power_cap:
+		_field_scale_and_write(uncore,
+				       hwmon->rg.pkg_rapl_limit_udw,
+				       PKG_PWR_LIM_2,
+				       hwmon->scl_shift_power,
+				       SF_POWER, val);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
+static umode_t
+i915_is_visible(const void *data, enum hwmon_sensor_types type,
+		u32 attr, int channel)
+{
+	struct drm_i915_private *i915 = (struct drm_i915_private *)data;
+
+	switch (type) {
+	case hwmon_power:
+		return i915_power_is_visible(i915, attr, channel);
+	default:
+		return 0;
+	}
+}
+
+static int
+i915_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+	  int channel, long *val)
+{
+	struct drm_i915_private *i915 = kdev_to_i915(dev);
+
+	switch (type) {
+	case hwmon_power:
+		return i915_power_read(i915, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+i915_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+	   int channel, long val)
+{
+	struct drm_i915_private *i915 = kdev_to_i915(dev);
+
+	switch (type) {
+	case hwmon_power:
+		return i915_power_write(i915, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct hwmon_ops i915_hwmon_ops = {
+	.is_visible = i915_is_visible,
+	.read = i915_read,
+	.write = i915_write,
+};
+
+static const struct hwmon_chip_info i915_chip_info = {
+	.ops = &i915_hwmon_ops,
+	.info = i915_info,
+};
+
+static void
+i915_hwmon_get_preregistration_info(struct drm_i915_private *i915)
+{
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	struct intel_uncore *uncore = &i915->uncore;
+	intel_wakeref_t wakeref;
+	u32 val_sku_unit;
+	__le32 le_sku_unit;
+
+	if (IS_DG1(i915)) {
+		hwmon->rg.pkg_power_sku_unit = PCU_PACKAGE_POWER_SKU_UNIT;
+		hwmon->rg.pkg_power_sku = PCU_PACKAGE_POWER_SKU;
+		hwmon->rg.pkg_energy_status = PCU_PACKAGE_ENERGY_STATUS;
+		hwmon->rg.pkg_rapl_limit = PCU_PACKAGE_RAPL_LIMIT;
+		hwmon->rg.pkg_rapl_limit_udw = PCU_PACKAGE_RAPL_LIMIT_UDW;
+		hwmon->rg.plt_energy_status = PCU_PLATFORM_ENERGY_STATUS;
+	} else {
+		hwmon->rg.pkg_power_sku_unit = INVALID_MMIO_REG;
+		hwmon->rg.pkg_power_sku = INVALID_MMIO_REG;
+		hwmon->rg.pkg_energy_status = INVALID_MMIO_REG;
+		hwmon->rg.pkg_rapl_limit = INVALID_MMIO_REG;
+		hwmon->rg.pkg_rapl_limit_udw = INVALID_MMIO_REG;
+		hwmon->rg.plt_energy_status = INVALID_MMIO_REG;
+	}
+
+	/*
+	 * If a platform does not support *_PLATFORM_ENERGY_STATUS,
+	 * try *PACKAGE_ENERGY_STATUS.
+	 */
+	if (i915_mmio_reg_valid(hwmon->rg.plt_energy_status))
+		hwmon->rg.reg_energy_status = hwmon->rg.plt_energy_status;
+	else
+		hwmon->rg.reg_energy_status = hwmon->rg.pkg_energy_status;
+
+	wakeref = intel_runtime_pm_get(uncore->rpm);
+
+	/*
+	 * The contents of register hwmon->rg.pkg_power_sku_unit do not change,
+	 * so read it once and store the shift values.
+	 */
+	if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku_unit))
+		val_sku_unit = intel_uncore_read(uncore,
+						 hwmon->rg.pkg_power_sku_unit);
+	else
+		val_sku_unit = 0;
+
+	hwmon->energy_counter_overflow = 0;
+
+	if (i915_mmio_reg_valid(hwmon->rg.reg_energy_status))
+		hwmon->energy_counter_prev =
+			intel_uncore_read(uncore, hwmon->rg.reg_energy_status);
+	else
+		hwmon->energy_counter_prev = 0;
+
+	intel_runtime_pm_put(uncore->rpm, wakeref);
+
+	le_sku_unit = cpu_to_le32(val_sku_unit);
+	hwmon->scl_shift_power = le32_get_bits(le_sku_unit, PKG_PWR_UNIT);
+	hwmon->scl_shift_energy = le32_get_bits(le_sku_unit, PKG_ENERGY_UNIT);
+	hwmon->scl_shift_time = le32_get_bits(le_sku_unit, PKG_TIME_UNIT);
+
+	/*
+	 * There is no direct way to obtain the power default_limit.
+	 * The best known workaround is to use the initial value of power1_max.
+	 *
+	 * The value of power1_max is reset to the default on reboot, but is
+	 * not reset by a module unload/load sequence.  To allow proper
+	 * functioning after a module reload, the value for power1_max is
+	 * restored to its original value at module unload time in
+	 * i915_hwmon_fini().
+	 */
+	hwmon->power_max_initial_value =
+		(u32)_field_read_and_scale(uncore,
+					   hwmon->rg.pkg_rapl_limit,
+					   PKG_PWR_LIM_1,
+					   hwmon->scl_shift_power, SF_POWER);
+}
+
+int i915_hwmon_init(struct drm_device *drm_dev)
+{
+	struct drm_i915_private *i915 = to_i915(drm_dev);
+	struct i915_hwmon *hwmon = &i915->hwmon;
+	struct device *hwmon_dev;
+
+	mutex_init(&hwmon->hwmon_lock);
+
+	i915_hwmon_get_preregistration_info(i915);
+
+	hwmon_dev = hwmon_device_register_with_info(drm_dev->dev, "i915",
+						    drm_dev,
+						    &i915_chip_info,
+						    hwmon_groups);
+
+	if (IS_ERR(hwmon_dev)) {
+		mutex_destroy(&hwmon->hwmon_lock);
+		return PTR_ERR(hwmon_dev);
+	}
+
+	hwmon->dev = hwmon_dev;
+
+	return 0;
+}
+
+void i915_hwmon_fini(struct drm_device *drm_dev)
+{
+	struct drm_i915_private *i915 = to_i915(drm_dev);
+	struct i915_hwmon *hwmon = &i915->hwmon;
+
+	if (hwmon->power_max_initial_value) {
+		/* Restore power1_max. */
+		_field_scale_and_write(&i915->uncore, hwmon->rg.pkg_rapl_limit,
+				       PKG_PWR_LIM_1, hwmon->scl_shift_power,
+				       SF_POWER,
+				       hwmon->power_max_initial_value);
+	}
+
+	if (hwmon->dev)
+		hwmon_device_unregister(hwmon->dev);
+
+	mutex_destroy(&hwmon->hwmon_lock);
+
+	memset(hwmon, 0, sizeof(*hwmon));
+}
diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h
new file mode 100644
index 0000000000000..0be919f0a463d
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_hwmon.h
@@ -0,0 +1,41 @@ 
+/* SPDX-License-Identifier: MIT */
+
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __INTEL_HWMON_H__
+#define __INTEL_HWMON_H__
+
+#include <drm/drm_device.h>
+#include "i915_reg.h"
+
+struct i915_hwmon_reg {
+	i915_reg_t pkg_power_sku_unit;
+	i915_reg_t pkg_power_sku;
+	i915_reg_t pkg_energy_status;
+	i915_reg_t pkg_rapl_limit;
+	i915_reg_t pkg_rapl_limit_udw;
+	i915_reg_t plt_energy_status;
+	i915_reg_t reg_energy_status;
+};
+
+struct i915_hwmon {
+	struct device *dev;
+	struct mutex hwmon_lock;	/* counter overflow logic and rmw */
+
+	struct i915_hwmon_reg rg;
+
+	u32 energy_counter_overflow;
+	u32 energy_counter_prev;
+	u32 power_max_initial_value;
+
+	int scl_shift_power;
+	int scl_shift_energy;
+	int scl_shift_time;
+};
+
+int i915_hwmon_init(struct drm_device *drm_dev);
+void i915_hwmon_fini(struct drm_device *drm_dev);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f80d656331f42..62fccf71ddad6 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4071,6 +4071,59 @@  static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define BXT_RP_STATE_CAP        _MMIO(0x138170)
 #define GEN9_RP_STATE_LIMITS	_MMIO(0x138148)
 
+/* DG1 */
+
+/* based on MCHBAR_MIRROR_BASE_SNB == 0x140000 */
+#define PCU_PACKAGE_POWER_SKU_UNIT	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5938)
+#define PCU_PACKAGE_ENERGY_STATUS	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x593c)
+#define PCU_PACKAGE_RAPL_LIMIT		_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
+#define PCU_PACKAGE_RAPL_LIMIT_UDW	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x59a4)
+#define PCU_PACKAGE_POWER_SKU		INVALID_MMIO_REG
+#define PCU_PLATFORM_ENERGY_STATUS	INVALID_MMIO_REG
+
+/* Fields for *_PACKAGE_RAPL_LIMIT: */
+#define   PKG_PWR_LIM_1			REG_GENMASK(14, 0)
+#define   PKG_PWR_LIM_1_EN		REG_BIT(15)
+#define   PKG_PWR_LIM_1_TIME		REG_GENMASK(23, 17)
+
+/*
+ * Fields for *_PACKAGE_RAPL_LIMIT_UDW:
+ * In docs, these fields may be defined relative to the entire 64-bit
+ * register, but here they are defined relative to the 32-bit boundary.
+ */
+#define   PKG_PWR_LIM_2			REG_GENMASK(14, 0)	// 46:32
+#define   PKG_PWR_LIM_2_EN		REG_BIT(15)		// 47:47
+#define   PKG_PWR_LIM_2_TIME		REG_GENMASK(23, 17)	// 55:49
+
+/*
+ * *_PACKAGE_POWER_SKU_UNIT - fields specifying scaling for PCU quantities.
+ * - PKG_PWR_UNIT - Power Units used for power control registers. The
+ *   actual unit value is calculated by 1 W / Power(2,PKG_PWR_UNIT).
+ * - PKG_ENERGY_UNIT - Energy Units used for power control registers. The
+ *   actual unit value is calculated by 1 J / Power(2,PKG_ENERGY_UNIT).
+ * - PKG_TIME_UNIT - Time Units used for power control registers. The
+ *   actual unit value is calculated by 1 s / Power(2,PKG_TIME_UNIT).
+ */
+#define   PKG_PWR_UNIT			REG_GENMASK(3, 0)
+#define   PKG_ENERGY_UNIT		REG_GENMASK(12, 8)
+#define   PKG_TIME_UNIT			REG_GENMASK(19, 16)
+
+/*
+ * *_PACKAGE_POWER_SKU - SKU power and timing parameters.
+ * Used herein as a 64-bit bit register.
+ * These masks are defined using GENMASK_ULL as REG_GENMASK is limited to u32
+ * and as GENMASK is "long" and therefore 32-bits on a 32-bit system.
+ * PKG_PKG_TDP, PKG_MIN_PWR, and PKG_MAX_PWR are scaled in the same way as
+ * PKG_PWR_LIM_*, above.
+ * PKG_MAX_WIN has sub-fields for x and y, and has the value: is 1.x * 2^y.
+ */
+#define   PKG_PKG_TDP			GENMASK_ULL(14, 0)
+#define   PKG_MIN_PWR			GENMASK_ULL(30, 16)
+#define   PKG_MAX_PWR			GENMASK_ULL(46, 32)
+#define   PKG_MAX_WIN			GENMASK_ULL(54, 48)
+#define     PKG_MAX_WIN_Y		GENMASK_ULL(54, 53)
+#define     PKG_MAX_WIN_X		GENMASK_ULL(52, 48)
+
 /*
  * Logical Context regs
  */