Message ID | 20200123220422.30334-1-linux@roeck-us.net (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [v3] hwmon: (k10temp) Display up to eight sets of CCD temperatures | expand |
Sorry for the noise - please don't bother with this version. I'll send v4 in a minute, with improved support for older EPYC CPUs. Guenter On Thu, Jan 23, 2020 at 02:04:22PM -0800, Guenter Roeck wrote: > In HWiNFO, we see support for Tccd1, Tccd3, Tccd5, and Tccd7 temperature > sensors on Zen2 based Threadripper CPUs. Checking register maps on > Threadripper 3970X confirms SMN register addresses and values for those > sensors. > > Register values observed in an idle system: > > 0x059950: 00000000 00000abc 00000000 00000ad8 > 0x059960: 00000000 00000ade 00000000 00000ae4 > > Under load: > > 0x059950: 00000000 00000c02 00000000 00000c14 > 0x059960: 00000000 00000c30 00000000 00000c22 > > More analysis shows that EPYC CPUs support up to 8 CCD temperature > sensors. > > On top of that, in thm_10_0_sh_mask.h in the Linux kernel, we find > definitions for THM_DIE{1-3}_TEMP__VALID_MASK, set to 0x00000800, as well > as matching SMN addresses. This lets us conclude that bit 11 of the > respective registers is a valid bit. With this assumption, the temperature > offset is now 49 degrees C. This conveniently matches the documented > temperature offset for Tdie, again suggesting that above registers indeed > report temperatures sensor values. Assume that bit 11 is indeed a valid > bit, and add support for the additional sensors. > > With this patch applied, output from 3970X (idle) looks as follows: > > k10temp-pci-00c3 > Adapter: PCI adapter > Tdie: +55.9°C > Tctl: +55.9°C > Tccd1: +39.8°C > Tccd3: +43.8°C > Tccd5: +43.8°C > Tccd7: +44.8°C > > Tested-by: Michael Larabel <michael@phoronix.com> > Signed-off-by: Guenter Roeck <linux@roeck-us.net> > --- > v3: Increased number of supported CCD sensors to 8. > Dropped note that functionality on high-end CPUs is not known, > since that is no longer correct. > > v2: Added additional sensors to k10temp_info[]. > > drivers/hwmon/k10temp.c | 70 ++++++++++++++++++----------------------- > 1 file changed, 31 insertions(+), 39 deletions(-) > > diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c > index 5e3f43594084..1634cb6394f0 100644 > --- a/drivers/hwmon/k10temp.c > +++ b/drivers/hwmon/k10temp.c > @@ -7,7 +7,7 @@ > * Copyright (c) 2020 Guenter Roeck <linux@roeck-us.net> > * > * Implementation notes: > - * - CCD1 and CCD2 register address information as well as the calculation to > + * - CCD register address information as well as the calculation to > * convert raw register values is from https://github.com/ocerman/zenpower. > * The information is not confirmed from chip datasheets, but experiments > * suggest that it provides reasonable temperature values. > @@ -18,11 +18,6 @@ > * normalized to report 1A/LSB for core current and and 0.25A/LSB for SoC > * current. Reported values can be adjusted using the sensors configuration > * file. > - * - It is unknown if the mechanism to read CCD1/CCD2 temperature as well as > - * current and voltage information works on higher-end Ryzen CPUs. > - * Information reported by Windows tools suggests that additional sensors > - * (both temperature and voltage/current) are supported, but their register > - * location is currently unknown. > */ > > #include <linux/bitops.h> > @@ -80,8 +75,10 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); > > /* F17h M01h Access througn SMN */ > #define F17H_M01H_REPORTED_TEMP_CTRL_OFFSET 0x00059800 > -#define F17H_M70H_CCD1_TEMP 0x00059954 > -#define F17H_M70H_CCD2_TEMP 0x00059958 > + > +#define F17H_M70H_CCD_TEMP(x) (0x00059954 + ((x) * 4)) > +#define F17H_M70H_CCD_TEMP_VALID BIT(11) > +#define F17H_M70H_CCD_TEMP_MASK GENMASK(10, 0) > > #define F17H_M01H_SVI 0x0005A000 > #define F17H_M01H_SVI_TEL_PLANE0 (F17H_M01H_SVI + 0xc) > @@ -100,8 +97,7 @@ struct k10temp_data { > int temp_offset; > u32 temp_adjust_mask; > bool show_tdie; > - bool show_tccd1; > - bool show_tccd2; > + u32 show_tccd; > u32 svi_addr[2]; > bool show_current; > int cfactor[2]; > @@ -188,6 +184,12 @@ const char *k10temp_temp_label[] = { > "Tctl", > "Tccd1", > "Tccd2", > + "Tccd3", > + "Tccd4", > + "Tccd5", > + "Tccd6", > + "Tccd7", > + "Tccd8", > }; > > const char *k10temp_in_label[] = { > @@ -277,15 +279,10 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel, > if (*val < 0) > *val = 0; > break; > - case 2: /* Tccd1 */ > - amd_smn_read(amd_pci_dev_to_node_id(data->pdev), > - F17H_M70H_CCD1_TEMP, ®val); > - *val = (regval & 0xfff) * 125 - 305000; > - break; > - case 3: /* Tccd2 */ > + case 2 ... 9: /* Tccd{1-8} */ > amd_smn_read(amd_pci_dev_to_node_id(data->pdev), > - F17H_M70H_CCD2_TEMP, ®val); > - *val = (regval & 0xfff) * 125 - 305000; > + F17H_M70H_CCD_TEMP(channel - 2), ®val); > + *val = (regval & F17H_M70H_CCD_TEMP_MASK) * 125 - 49000; > break; > default: > return -EOPNOTSUPP; > @@ -343,12 +340,8 @@ static umode_t k10temp_is_visible(const void *_data, > if (!data->show_tdie) > return 0; > break; > - case 2: /* Tccd1 */ > - if (!data->show_tccd1) > - return 0; > - break; > - case 3: /* Tccd2 */ > - if (!data->show_tccd2) > + case 2 ... 9: /* Tccd{1-8} */ > + if (!(data->show_tccd & BIT(channel - 2))) > return 0; > break; > default: > @@ -382,12 +375,8 @@ static umode_t k10temp_is_visible(const void *_data, > case 0: /* Tdie */ > case 1: /* Tctl */ > break; > - case 2: /* Tccd1 */ > - if (!data->show_tccd1) > - return 0; > - break; > - case 3: /* Tccd2 */ > - if (!data->show_tccd2) > + case 2 ... 9: /* Tccd{1-8} */ > + if (!(data->show_tccd & BIT(channel - 2))) > return 0; > break; > default: > @@ -520,6 +509,12 @@ static const struct hwmon_channel_info *k10temp_info[] = { > HWMON_T_LABEL, > HWMON_T_INPUT | HWMON_T_LABEL, > HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > + HWMON_T_INPUT | HWMON_T_LABEL, > HWMON_T_INPUT | HWMON_T_LABEL), > HWMON_CHANNEL_INFO(in, > HWMON_I_INPUT | HWMON_I_LABEL, > @@ -595,15 +590,12 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id) > data->cfactor[1] = CFACTOR_ISOC; > data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE1; > data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE0; > - amd_smn_read(amd_pci_dev_to_node_id(pdev), > - F17H_M70H_CCD1_TEMP, ®val); > - if (regval & 0xfff) > - data->show_tccd1 = true; > - > - amd_smn_read(amd_pci_dev_to_node_id(pdev), > - F17H_M70H_CCD2_TEMP, ®val); > - if (regval & 0xfff) > - data->show_tccd2 = true; > + for (i = 0; i < 8; i++) { > + amd_smn_read(amd_pci_dev_to_node_id(pdev), > + F17H_M70H_CCD_TEMP(i), ®val); > + if (regval & F17H_M70H_CCD_TEMP_VALID) > + data->show_tccd |= BIT(i); > + } > break; > } > } else { > -- > 2.17.1 >
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 5e3f43594084..1634cb6394f0 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -7,7 +7,7 @@ * Copyright (c) 2020 Guenter Roeck <linux@roeck-us.net> * * Implementation notes: - * - CCD1 and CCD2 register address information as well as the calculation to + * - CCD register address information as well as the calculation to * convert raw register values is from https://github.com/ocerman/zenpower. * The information is not confirmed from chip datasheets, but experiments * suggest that it provides reasonable temperature values. @@ -18,11 +18,6 @@ * normalized to report 1A/LSB for core current and and 0.25A/LSB for SoC * current. Reported values can be adjusted using the sensors configuration * file. - * - It is unknown if the mechanism to read CCD1/CCD2 temperature as well as - * current and voltage information works on higher-end Ryzen CPUs. - * Information reported by Windows tools suggests that additional sensors - * (both temperature and voltage/current) are supported, but their register - * location is currently unknown. */ #include <linux/bitops.h> @@ -80,8 +75,10 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); /* F17h M01h Access througn SMN */ #define F17H_M01H_REPORTED_TEMP_CTRL_OFFSET 0x00059800 -#define F17H_M70H_CCD1_TEMP 0x00059954 -#define F17H_M70H_CCD2_TEMP 0x00059958 + +#define F17H_M70H_CCD_TEMP(x) (0x00059954 + ((x) * 4)) +#define F17H_M70H_CCD_TEMP_VALID BIT(11) +#define F17H_M70H_CCD_TEMP_MASK GENMASK(10, 0) #define F17H_M01H_SVI 0x0005A000 #define F17H_M01H_SVI_TEL_PLANE0 (F17H_M01H_SVI + 0xc) @@ -100,8 +97,7 @@ struct k10temp_data { int temp_offset; u32 temp_adjust_mask; bool show_tdie; - bool show_tccd1; - bool show_tccd2; + u32 show_tccd; u32 svi_addr[2]; bool show_current; int cfactor[2]; @@ -188,6 +184,12 @@ const char *k10temp_temp_label[] = { "Tctl", "Tccd1", "Tccd2", + "Tccd3", + "Tccd4", + "Tccd5", + "Tccd6", + "Tccd7", + "Tccd8", }; const char *k10temp_in_label[] = { @@ -277,15 +279,10 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel, if (*val < 0) *val = 0; break; - case 2: /* Tccd1 */ - amd_smn_read(amd_pci_dev_to_node_id(data->pdev), - F17H_M70H_CCD1_TEMP, ®val); - *val = (regval & 0xfff) * 125 - 305000; - break; - case 3: /* Tccd2 */ + case 2 ... 9: /* Tccd{1-8} */ amd_smn_read(amd_pci_dev_to_node_id(data->pdev), - F17H_M70H_CCD2_TEMP, ®val); - *val = (regval & 0xfff) * 125 - 305000; + F17H_M70H_CCD_TEMP(channel - 2), ®val); + *val = (regval & F17H_M70H_CCD_TEMP_MASK) * 125 - 49000; break; default: return -EOPNOTSUPP; @@ -343,12 +340,8 @@ static umode_t k10temp_is_visible(const void *_data, if (!data->show_tdie) return 0; break; - case 2: /* Tccd1 */ - if (!data->show_tccd1) - return 0; - break; - case 3: /* Tccd2 */ - if (!data->show_tccd2) + case 2 ... 9: /* Tccd{1-8} */ + if (!(data->show_tccd & BIT(channel - 2))) return 0; break; default: @@ -382,12 +375,8 @@ static umode_t k10temp_is_visible(const void *_data, case 0: /* Tdie */ case 1: /* Tctl */ break; - case 2: /* Tccd1 */ - if (!data->show_tccd1) - return 0; - break; - case 3: /* Tccd2 */ - if (!data->show_tccd2) + case 2 ... 9: /* Tccd{1-8} */ + if (!(data->show_tccd & BIT(channel - 2))) return 0; break; default: @@ -520,6 +509,12 @@ static const struct hwmon_channel_info *k10temp_info[] = { HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, @@ -595,15 +590,12 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id) data->cfactor[1] = CFACTOR_ISOC; data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE1; data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE0; - amd_smn_read(amd_pci_dev_to_node_id(pdev), - F17H_M70H_CCD1_TEMP, ®val); - if (regval & 0xfff) - data->show_tccd1 = true; - - amd_smn_read(amd_pci_dev_to_node_id(pdev), - F17H_M70H_CCD2_TEMP, ®val); - if (regval & 0xfff) - data->show_tccd2 = true; + for (i = 0; i < 8; i++) { + amd_smn_read(amd_pci_dev_to_node_id(pdev), + F17H_M70H_CCD_TEMP(i), ®val); + if (regval & F17H_M70H_CCD_TEMP_VALID) + data->show_tccd |= BIT(i); + } break; } } else {