diff mbox series

[v4,6/6] hwmon: (k10temp) Add debugfs support

Message ID 20200122160800.12560-7-linux@roeck-us.net
State Superseded
Headers show
Series hwmon: k10temp driver improvements | expand

Commit Message

Guenter Roeck Jan. 22, 2020, 4:08 p.m. UTC
Show thermal and SVI registers for Family 17h CPUs.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/k10temp.c | 78 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 1 deletion(-)

Comments

Ken Moffat Jan. 24, 2020, 12:01 a.m. UTC | #1
Hi Guenter,

you asked else where for  debugfs files from machines with embedded
graphics. I've pasted diffs below (idle,load) from my 3400G ('Picasso' APU).

On Wed, 22 Jan 2020 at 16:08, Guenter Roeck <linux@roeck-us.net> wrote:
>
> Show thermal and SVI registers for Family 17h CPUs.
>
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> ---
>  drivers/hwmon/k10temp.c | 78 ++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 77 insertions(+), 1 deletion(-)
>
[snipping here for brevity]

--- svi-idle    2020-01-23 23:27:36.576177896 +0000
+++ svi-load    2020-01-23 23:33:05.342392957 +0000
@@ -1,8 +1,8 @@
-0x05a000: 0000000e 0000000e 00000002 01710000
-0x05a010: 014a0010 00000000 0000000e 00000000
-0x05a020: 00000000 00000000 00000080 005f0000
+0x05a000: 0000000e 0000000e 00000002 011f002e
+0x05a010: 014a0017 00000000 0000000e 00000000
+0x05a020: 00000000 00000000 00000080 001a0000
 0x05a030: 00000000 00000000 00000021 00000000
-0x05a040: 00000000 00000000 00000000 5f000000
+0x05a040: 00000000 00000000 00000000 1a000000
 0x05a050: 68000000 48000000 00000000 0000030a
 0x05a060: 00000007 00000000 80000002 80000002
 0x05a070: 80000041 00000001 00000008 00000000

--- thm-idle    2020-01-23 23:27:51.969229368 +0000
+++ thm-load    2020-01-23 23:33:19.779445923 +0000
@@ -1,15 +1,15 @@
-0x059800: 24200fef 00ff1001 00002921 000f4240
+0x059800: 3f800fef 00ff1001 00002921 000f4240
 0x059810: 800000f9 00000000 00000000 00000000
 0x059820: 00000000 00000000 00000000 0fff0078
-0x059830: 00000000 0029ccdf 0029acde 002a2ce2
-0x059840: 002a4ce3 002a0ce1 002a0ce1 002a6ce4
-0x059850: 0029ece0 0029ece0 002a0ce1 002a0ce1
-0x059860: 0029acde 002a8ce5 0029ece0 0029acde
-0x059870: 00298cdd 0029ece0 002a8ce5 002a4ce3
-0x059880: 0029ccdf 002a8ce5 0029acde 00296cdc
-0x059890: 002a4ce3 00296cdc 0029ece0 0029acde
-0x0598a0: 00294cdb 0029ece0 00294cdb 00298cdd
-0x0598b0: 0029acde 00000000 00002100 ffffffff
+0x059830: 00000000 0030cd17 002e8d05 002f4d0b
+0x059840: 00338d2c 0032cd26 00314d1b 0034cd36
+0x059850: 002d8cfd 002e2d02 00300d11 002eed08
+0x059860: 002dccff 002fcd0f 002d4cfb 002e0d01
+0x059870: 002ded00 002f2d0a 00346d33 00344d32
+0x059880: 002f8d0d 00346d33 002f4d0b 0030cd17
+0x059890: 00344d32 00302d12 0031ed20 00386d53
+0x0598a0: 00392d59 0036ad45 0036ed47 0034ad35
+0x0598b0: 0034ad35 00000000 00002100 ffffffff
 0x0598c0: 00000000 00000000 00000000 00000000
 0x0598d0: 00000000 00000000 00000000 00000000
 0x0598e0: 00000000 00000000 00000000 00000000
@@ -20,15 +20,15 @@
 0x059930: 00000000 00000000 00000000 00000000
 0x059940: 00000000 00000000 00000000 00000000
 0x059950: 00000000 00000000 00000000 00000000
-0x059960: 00000000 08400001 00004623 00000039
+0x059960: 00000000 08400001 00008241 00000045
 0x059970: c0800005 30c8680e 00024068 00000000
 0x059980: 00000000 00000000 00000000 00000000
 0x059990: 00000000 00000000 00000000 00000000
 0x0599a0: 00000000 00000000 00000000 00000000
 0x0599b0: 00000000 00000000 00000000 00000000
-0x0599c0: 00000060 000002a8 0000000c 00000294
-0x0599d0: 0000001b 00000000 00000000 000002a8
-0x0599e0: 0000000c 00000000 00000000 00000001
+0x0599c0: 00000060 00000392 0000001b 000002d4
+0x0599d0: 0000000d 00000000 00000000 00000392
+0x0599e0: 0000001b 00000000 00000000 00000001
 0x0599f0: 00000000 00010003 00000000 00000000
 0x059a00: 00000000 00000000 00000000 00000000
 0x059a10: 0000000e 00000000 00000003 00000000

and the accompanying human-readable sensor output
(these were not all taken at hte exact same moment)

--- k10-idle 2020-01-23 23:25:32.020740997 +0000
+++ k10-load 2020-01-23 23:33:01.305378146 +0000
@@ -1,15 +1,15 @@
 k10temp-pci-00c3
 Adapter: PCI adapter
-Vcore:        +0.96 V
-Vsoc:         +1.09 V
-Tdie:         +36.9°C
-Tctl:         +36.9°C
-Icore:        +2.00 A
-Isoc:         +5.75 A
+Vcore:        +1.34 V
+Vsoc:         +1.08 V
+Tdie:         +62.5°C
+Tctl:         +62.5°C
+Icore:       +56.00 A
+Isoc:         +6.75 A

 amdgpu-pci-0900
 Adapter: PCI adapter
 vddgfx:           N/A
 vddnb:            N/A
-edge:         +36.0°C  (crit = +80.0°C, hyst =  +0.0°C)
+edge:         +62.0°C  (crit = +80.0°C, hyst =  +0.0°C)

Hope this is not a waste of your time.
Would you like similar for the 2500u ?

ĸen
Guenter Roeck Jan. 24, 2020, 4:47 a.m. UTC | #2
Hi Ken,

On 1/23/20 4:01 PM, Ken Moffat wrote:
> Hi Guenter,
> 

Thanks a lot for the additional information. The following
is interesting.

> -0x059960: 00000000 08400001 00004623 00000039
> +0x059960: 00000000 08400001 00008241 00000045

The last two blocks also temperatures. In the AMD thermal code,
we find definitions for CG_MULT_THERMAL_STATUS and
CG_THERMAL_RANGE. The first consists of 2 x 9 bit (0x23
and 0x43 above for idle and under load), the second is just
a value. On Zen2, the address for those values is 20 higher
(0x05997c instead of 0x059968), but the numbers are pretty
much the same. The AMD thermal code reads those values for
some graphics chips and displays it directly in degrees C.

I am just not sure what exactly it represents. I see those
temperatures on 3900X as well. Actually, it looks like all
chips report them, including server chips, so it is not the
graphics temperature. But it is definitely worth keeping an eye
on it; maybe someone can figure out what it is.

> Hope this is not a waste of your time.

No, it is definitely worth it. It will give me data to work with
in the future.

> Would you like similar for the 2500u ?
> 
Yes, that would be great.

Thanks,
Guenter
diff mbox series

Patch

diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 4a470b5195ee..5e3f43594084 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -26,6 +26,7 @@ 
  */
 
 #include <linux/bitops.h>
+#include <linux/debugfs.h>
 #include <linux/err.h>
 #include <linux/hwmon.h>
 #include <linux/init.h>
@@ -442,6 +443,76 @@  static bool has_erratum_319(struct pci_dev *pdev)
 	       (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2);
 }
 
+#ifdef CONFIG_DEBUG_FS
+
+static void k10temp_smn_regs_show(struct seq_file *s, struct pci_dev *pdev,
+				  u32 addr, int count)
+{
+	u32 reg;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		if (!(i & 3))
+			seq_printf(s, "0x%06x: ", addr + i * 4);
+		amd_smn_read(amd_pci_dev_to_node_id(pdev), addr + i * 4, &reg);
+		seq_printf(s, "%08x ", reg);
+		if ((i & 3) == 3)
+			seq_puts(s, "\n");
+	}
+}
+
+static int svi_show(struct seq_file *s, void *unused)
+{
+	struct k10temp_data *data = s->private;
+
+	k10temp_smn_regs_show(s, data->pdev, F17H_M01H_SVI, 32);
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(svi);
+
+static int thm_show(struct seq_file *s, void *unused)
+{
+	struct k10temp_data *data = s->private;
+
+	k10temp_smn_regs_show(s, data->pdev,
+			      F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, 256);
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(thm);
+
+static void k10temp_debugfs_cleanup(void *ddir)
+{
+	debugfs_remove_recursive(ddir);
+}
+
+static void k10temp_init_debugfs(struct k10temp_data *data)
+{
+	struct dentry *debugfs;
+	char name[32];
+
+	/* Only show debugfs data for Family 17h/18h CPUs */
+	if (!data->show_tdie)
+		return;
+
+	scnprintf(name, sizeof(name), "k10temp-%s", pci_name(data->pdev));
+
+	debugfs = debugfs_create_dir(name, NULL);
+	if (debugfs) {
+		debugfs_create_file("svi", 0444, debugfs, data, &svi_fops);
+		debugfs_create_file("thm", 0444, debugfs, data, &thm_fops);
+		devm_add_action_or_reset(&data->pdev->dev,
+					 k10temp_debugfs_cleanup, debugfs);
+	}
+}
+
+#else
+
+static void k10temp_init_debugfs(struct k10temp_data *data)
+{
+}
+
+#endif
+
 static const struct hwmon_channel_info *k10temp_info[] = {
 	HWMON_CHANNEL_INFO(temp,
 			   HWMON_T_INPUT | HWMON_T_MAX |
@@ -553,7 +624,12 @@  static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	hwmon_dev = devm_hwmon_device_register_with_info(dev, "k10temp", data,
 							 &k10temp_chip_info,
 							 NULL);
-	return PTR_ERR_OR_ZERO(hwmon_dev);
+	if (IS_ERR(hwmon_dev))
+		return PTR_ERR(hwmon_dev);
+
+	k10temp_init_debugfs(data);
+
+	return 0;
 }
 
 static const struct pci_device_id k10temp_id_table[] = {