Message ID | 20230815045658.80494-12-michael.chan@broadcom.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | None | expand |
On Mon, Aug 14, 2023 at 09:56:57PM -0700, Michael Chan wrote: > From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> > > HWRM_TEMP_MONITOR_QUERY response now indicates various > threshold temperatures. Expose these threshold temperatures > through the hwmon sysfs. > Also, provide temp1_max_alarm through which the user can check > whether the threshold temperature has been reached or not. > > Example: > cat /sys/class/hwmon/hwmon3/temp1_input > 75000 > cat /sys/class/hwmon/hwmon3/temp1_max > 105000 > cat /sys/class/hwmon/hwmon3/temp1_max_alarm > 0 > > Cc: Jean Delvare <jdelvare@suse.com> > Cc: Guenter Roeck <linux@roeck-us.net> > Cc: linux-hwmon@vger.kernel.org > Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> > Signed-off-by: Michael Chan <michael.chan@broadcom.com> > --- > drivers/net/ethernet/broadcom/bnxt/bnxt.h | 7 ++ > .../net/ethernet/broadcom/bnxt/bnxt_hwmon.c | 71 +++++++++++++++++-- > 2 files changed, 73 insertions(+), 5 deletions(-) > > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h > index 84cbcfa61bc1..43a07d84f815 100644 > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h > @@ -2013,6 +2013,7 @@ struct bnxt { > #define BNXT_FW_CAP_RING_MONITOR BIT_ULL(30) > #define BNXT_FW_CAP_DBG_QCAPS BIT_ULL(31) > #define BNXT_FW_CAP_PTP BIT_ULL(32) > + #define BNXT_FW_CAP_THRESHOLD_TEMP_SUPPORTED BIT_ULL(33) > > u32 fw_dbg_cap; > > @@ -2185,7 +2186,13 @@ struct bnxt { > struct bnxt_tc_info *tc_info; > struct list_head tc_indr_block_list; > struct dentry *debugfs_pdev; > +#ifdef CONFIG_BNXT_HWMON > struct device *hwmon_dev; > + u8 warn_thresh_temp; > + u8 crit_thresh_temp; > + u8 fatal_thresh_temp; > + u8 shutdown_thresh_temp; > +#endif > enum board_idx board_idx; > }; > > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c > index 20381b7b1d78..f5affac1169a 100644 > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c > @@ -34,6 +34,15 @@ static int bnxt_hwrm_temp_query(struct bnxt *bp, u8 *temp) > > if (temp) > *temp = resp->temp; > + > + if (resp->flags & TEMP_MONITOR_QUERY_RESP_FLAGS_THRESHOLD_VALUES_AVAILABLE) { > + if (!temp) > + bp->fw_cap |= BNXT_FW_CAP_THRESHOLD_TEMP_SUPPORTED; The if statement seems unnecessary. If the flag was not set during initialization, the limit attributes won't be visible anyway, so it doesn't make a difference if it is set now or not. > + bp->warn_thresh_temp = resp->warn_threshold; > + bp->crit_thresh_temp = resp->critical_threshold; > + bp->fatal_thresh_temp = resp->fatal_threshold; > + bp->shutdown_thresh_temp = resp->shutdown_threshold; Are those temperatures expected to change during runtime ? If not it might make sense to only execute the entire if condition if temp == NULL to avoid unnecessary reassignments whenever the temperature is read. > + } > err: > hwrm_req_drop(bp, req); > return rc; > @@ -42,12 +51,30 @@ static int bnxt_hwrm_temp_query(struct bnxt *bp, u8 *temp) > static umode_t bnxt_hwmon_is_visible(const void *_data, enum hwmon_sensor_types type, > u32 attr, int channel) > { > + const struct bnxt *bp = _data; > + > if (type != hwmon_temp) > return 0; > > switch (attr) { > case hwmon_temp_input: > return 0444; > + case hwmon_temp_lcrit: > + case hwmon_temp_crit: > + case hwmon_temp_emergency: > + case hwmon_temp_lcrit_alarm: > + case hwmon_temp_crit_alarm: > + case hwmon_temp_emergency_alarm: > + if (~bp->fw_cap & BNXT_FW_CAP_THRESHOLD_TEMP_SUPPORTED) Seems to me that if (!(bp->fw_cap & BNXT_FW_CAP_THRESHOLD_TEMP_SUPPORTED)) would be much easier to understand. > + return 0; > + return 0444; > + /* Max temperature setting in NVM is optional */ > + case hwmon_temp_max: > + case hwmon_temp_max_alarm: > + if (~bp->fw_cap & BNXT_FW_CAP_THRESHOLD_TEMP_SUPPORTED || > + !bp->shutdown_thresh_temp) > + return 0; Wrong use of the 'max' attribute. More on that below. > + return 0444; > default: > return 0; > } > @@ -66,6 +93,38 @@ static int bnxt_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 > if (!rc) > *val = temp * 1000; > return rc; > + case hwmon_temp_lcrit: > + *val = bp->warn_thresh_temp * 1000; > + return 0; > + case hwmon_temp_crit: > + *val = bp->crit_thresh_temp * 1000; > + return 0; > + case hwmon_temp_emergency: > + *val = bp->fatal_thresh_temp * 1000; > + return 0; > + case hwmon_temp_max: > + *val = bp->shutdown_thresh_temp * 1000; > + return 0; > + case hwmon_temp_lcrit_alarm: > + rc = bnxt_hwrm_temp_query(bp, &temp); > + if (!rc) > + *val = temp >= bp->warn_thresh_temp; That is wrong. lcrit is the _lower_ critical temperature, ie the temperature is critically low. This is not a "high temperature" alarm. > + return rc; > + case hwmon_temp_crit_alarm: > + rc = bnxt_hwrm_temp_query(bp, &temp); > + if (!rc) > + *val = temp >= bp->crit_thresh_temp; > + return rc; > + case hwmon_temp_emergency_alarm: > + rc = bnxt_hwrm_temp_query(bp, &temp); > + if (!rc) > + *val = temp >= bp->fatal_thresh_temp; > + return rc; > + case hwmon_temp_max_alarm: > + rc = bnxt_hwrm_temp_query(bp, &temp); > + if (!rc) > + *val = temp >= bp->shutdown_thresh_temp; Hmm, that isn't really the purpose of alarm attributes. The expectation would be that the chip sets alarm flags and the driver reports it. I guess there is some value in having it, so I won't object. Anyway, the ordering is wrong. max_alarm should be the lowest alarm level, followed by crit and emergency. So max_alarm -> temp >= bp->warn_thresh_temp crit_alarm -> temp >= bp->crit_thresh_temp emergency_alarm -> temp >= bp->fatal_thresh_temp or temp >= bp->shutdown_thresh_temp There are only three levels of upper temperature alarms. Abusing lcrit as 4th upper alarm is most definitely wrong. > + return rc; > default: > return -EOPNOTSUPP; > } > @@ -73,7 +132,11 @@ static int bnxt_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 > > static const struct hwmon_channel_info *bnxt_hwmon_info[] = { > HWMON_CHANNEL_INFO(temp, > - HWMON_T_INPUT), > + HWMON_T_INPUT | > + HWMON_T_MAX | HWMON_T_LCRIT | > + HWMON_T_CRIT | HWMON_T_EMERGENCY | > + HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM | > + HWMON_T_MAX_ALARM | HWMON_T_EMERGENCY_ALARM), > NULL > }; > > @@ -97,13 +160,11 @@ void bnxt_hwmon_uninit(struct bnxt *bp) > > void bnxt_hwmon_init(struct bnxt *bp) > { > - struct hwrm_temp_monitor_query_input *req; > struct pci_dev *pdev = bp->pdev; > int rc; > > - rc = hwrm_req_init(bp, req, HWRM_TEMP_MONITOR_QUERY); > - if (!rc) > - rc = hwrm_req_send_silent(bp, req); > + /* temp1_xxx is only sensor, ensure not registered if it will fail */ > + rc = bnxt_hwrm_temp_query(bp, NULL); Ah, that is the reason for the check in bnxt_hwrm_temp_query(). The check in that function should really be added here, not in the previous patch. > if (rc == -EACCES || rc == -EOPNOTSUPP) { > bnxt_hwmon_uninit(bp); > return; > -- > 2.30.1 >
On Wed, Aug 16, 2023 at 03:58:34PM +0530, Kalesh Anakkur Purayil wrote: > Thank you Guenter for the review and the suggestions. > > Please see my response inline. > > On Tue, Aug 15, 2023 at 8:35 PM Guenter Roeck <linux@roeck-us.net> wrote: > [ ... ] > > > > Hmm, that isn't really the purpose of alarm attributes. The expectation > > would be that the chip sets alarm flags and the driver reports it. > > I guess there is some value in having it, so I won't object. > > > > Anyway, the ordering is wrong. max_alarm should be the lowest > > alarm level, followed by crit and emergency. So > > max_alarm -> temp >= bp->warn_thresh_temp > > crit_alarm -> temp >= bp->crit_thresh_temp > > emergency_alarm -> temp >= bp->fatal_thresh_temp > > or temp >= bp->shutdown_thresh_temp > > > > There are only three levels of upper temperature alarms. > > Abusing lcrit as 4th upper alarm is most definitely wrong. > > > [Kalesh]: Thank you for the clarification. > bnxt_en driver wants to expose 4 threshold temperatures to the user through > hwmon sysfs. > 1. warning threshold temperature > 2. critical threshold temperature > 3. fatal threshold temperature > 4. shutdown threshold temperature > > I will use the following mapping: > > hwmon_temp_max : warning threshold temperature > hwmon_temp_crit : critical threshold temperature > hwmon_temp_emergency : fatal threshold temperature > > hwmon_temp_max_alarm : temp >= bp->warn_thresh_temp > hwmon_temp_crit_alarm : temp >= bp->crit_thresh_temp > hwmon_temp_emergency_alarm : temp >= bp->fatal_thresh_temp > > Is it OK to map the shutdown threshold temperature to "hwmon_temp_fault"? That is a flag, not a temperature, and it is intended to signal a problem ith the sensor. > If not, can you please suggest an alternative? > The only one I can think of is to add non-standard attributes such as temp1_shutdown and temp1_shutdown_alarm. Guenter
On Wed, Aug 16, 2023 at 09:42:17PM +0530, Kalesh Anakkur Purayil wrote: > On Wed, Aug 16, 2023 at 5:43 PM Guenter Roeck <linux@roeck-us.net> wrote: > > > On Wed, Aug 16, 2023 at 03:58:34PM +0530, Kalesh Anakkur Purayil wrote: > > > Thank you Guenter for the review and the suggestions. > > > > > > Please see my response inline. > > > > > > On Tue, Aug 15, 2023 at 8:35 PM Guenter Roeck <linux@roeck-us.net> > > wrote: > > > > > [ ... ] > > > > > > > > > > Hmm, that isn't really the purpose of alarm attributes. The expectation > > > > would be that the chip sets alarm flags and the driver reports it. > > > > I guess there is some value in having it, so I won't object. > > > > > > > > Anyway, the ordering is wrong. max_alarm should be the lowest > > > > alarm level, followed by crit and emergency. So > > > > max_alarm -> temp >= bp->warn_thresh_temp > > > > crit_alarm -> temp >= bp->crit_thresh_temp > > > > emergency_alarm -> temp >= bp->fatal_thresh_temp > > > > or temp >= bp->shutdown_thresh_temp > > > > > > > > There are only three levels of upper temperature alarms. > > > > Abusing lcrit as 4th upper alarm is most definitely wrong. > > > > > > > [Kalesh]: Thank you for the clarification. > > > bnxt_en driver wants to expose 4 threshold temperatures to the user > > through > > > hwmon sysfs. > > > 1. warning threshold temperature > > > 2. critical threshold temperature > > > 3. fatal threshold temperature > > > 4. shutdown threshold temperature > > > > > > I will use the following mapping: > > > > > > hwmon_temp_max : warning threshold temperature > > > hwmon_temp_crit : critical threshold temperature > > > hwmon_temp_emergency : fatal threshold temperature > > > > > > hwmon_temp_max_alarm : temp >= bp->warn_thresh_temp > > > hwmon_temp_crit_alarm : temp >= bp->crit_thresh_temp > > > hwmon_temp_emergency_alarm : temp >= bp->fatal_thresh_temp > > > > > > Is it OK to map the shutdown threshold temperature to "hwmon_temp_fault"? > > > > That is a flag, not a temperature, and it is intended to signal > > a problem ith the sensor. > > > > > If not, can you please suggest an alternative? > > > > > > > The only one I can think of is to add non-standard attributes > > such as temp1_shutdown and temp1_shutdown_alarm. > > > [Kalesh]: Sorry, I don't quite get this part. I was looking at the kernel > hwmon code, but could not find any reference. > It would be non-standard attributes, so, correct, there is no reference. > Can we add new attributes "shutdown" and "shutdown_alarm" for tempX? For > example: > > #define HWMON_T_SHUTDOWN BIT(hwmon_temp_shutdown) > Not for a single driver. You can implement the sysfs attributes directly in the driver and pass an extra attribute group to the hwmon core when registering the hwmon device. Guenter
On Wed, Aug 16, 2023 at 12:25 PM Guenter Roeck <linux@roeck-us.net> wrote: > > On Wed, Aug 16, 2023 at 09:42:17PM +0530, Kalesh Anakkur Purayil wrote: > > [Kalesh]: Sorry, I don't quite get this part. I was looking at the kernel > > hwmon code, but could not find any reference. > > > > It would be non-standard attributes, so, correct, there is no reference. > > > Can we add new attributes "shutdown" and "shutdown_alarm" for tempX? For > > example: > > > > #define HWMON_T_SHUTDOWN BIT(hwmon_temp_shutdown) > > > > Not for a single driver. You can implement the sysfs attributes > directly in the driver and pass an extra attribute group to the > hwmon core when registering the hwmon device. Thanks for the review. I will drop these hwmon patches for now and respin the others for v2. Kalesh will rework these hwmon patches.
On Wed, Aug 16, 2023 at 11:28:07PM -0700, Michael Chan wrote: > On Wed, Aug 16, 2023 at 12:25 PM Guenter Roeck <linux@roeck-us.net> wrote: > > > > On Wed, Aug 16, 2023 at 09:42:17PM +0530, Kalesh Anakkur Purayil wrote: > > > [Kalesh]: Sorry, I don't quite get this part. I was looking at the kernel > > > hwmon code, but could not find any reference. > > > > > > > It would be non-standard attributes, so, correct, there is no reference. > > > > > Can we add new attributes "shutdown" and "shutdown_alarm" for tempX? For > > > example: > > > > > > #define HWMON_T_SHUTDOWN BIT(hwmon_temp_shutdown) > > > > > > > Not for a single driver. You can implement the sysfs attributes > > directly in the driver and pass an extra attribute group to the > > hwmon core when registering the hwmon device. > > Thanks for the review. I will drop these hwmon patches for now and > respin the others for v2. Kalesh will rework these hwmon patches. Alternatively you could keep the standard attributes and add the shutdown attributes later. Guenter
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 84cbcfa61bc1..43a07d84f815 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2013,6 +2013,7 @@ struct bnxt { #define BNXT_FW_CAP_RING_MONITOR BIT_ULL(30) #define BNXT_FW_CAP_DBG_QCAPS BIT_ULL(31) #define BNXT_FW_CAP_PTP BIT_ULL(32) + #define BNXT_FW_CAP_THRESHOLD_TEMP_SUPPORTED BIT_ULL(33) u32 fw_dbg_cap; @@ -2185,7 +2186,13 @@ struct bnxt { struct bnxt_tc_info *tc_info; struct list_head tc_indr_block_list; struct dentry *debugfs_pdev; +#ifdef CONFIG_BNXT_HWMON struct device *hwmon_dev; + u8 warn_thresh_temp; + u8 crit_thresh_temp; + u8 fatal_thresh_temp; + u8 shutdown_thresh_temp; +#endif enum board_idx board_idx; }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c index 20381b7b1d78..f5affac1169a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c @@ -34,6 +34,15 @@ static int bnxt_hwrm_temp_query(struct bnxt *bp, u8 *temp) if (temp) *temp = resp->temp; + + if (resp->flags & TEMP_MONITOR_QUERY_RESP_FLAGS_THRESHOLD_VALUES_AVAILABLE) { + if (!temp) + bp->fw_cap |= BNXT_FW_CAP_THRESHOLD_TEMP_SUPPORTED; + bp->warn_thresh_temp = resp->warn_threshold; + bp->crit_thresh_temp = resp->critical_threshold; + bp->fatal_thresh_temp = resp->fatal_threshold; + bp->shutdown_thresh_temp = resp->shutdown_threshold; + } err: hwrm_req_drop(bp, req); return rc; @@ -42,12 +51,30 @@ static int bnxt_hwrm_temp_query(struct bnxt *bp, u8 *temp) static umode_t bnxt_hwmon_is_visible(const void *_data, enum hwmon_sensor_types type, u32 attr, int channel) { + const struct bnxt *bp = _data; + if (type != hwmon_temp) return 0; switch (attr) { case hwmon_temp_input: return 0444; + case hwmon_temp_lcrit: + case hwmon_temp_crit: + case hwmon_temp_emergency: + case hwmon_temp_lcrit_alarm: + case hwmon_temp_crit_alarm: + case hwmon_temp_emergency_alarm: + if (~bp->fw_cap & BNXT_FW_CAP_THRESHOLD_TEMP_SUPPORTED) + return 0; + return 0444; + /* Max temperature setting in NVM is optional */ + case hwmon_temp_max: + case hwmon_temp_max_alarm: + if (~bp->fw_cap & BNXT_FW_CAP_THRESHOLD_TEMP_SUPPORTED || + !bp->shutdown_thresh_temp) + return 0; + return 0444; default: return 0; } @@ -66,6 +93,38 @@ static int bnxt_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 if (!rc) *val = temp * 1000; return rc; + case hwmon_temp_lcrit: + *val = bp->warn_thresh_temp * 1000; + return 0; + case hwmon_temp_crit: + *val = bp->crit_thresh_temp * 1000; + return 0; + case hwmon_temp_emergency: + *val = bp->fatal_thresh_temp * 1000; + return 0; + case hwmon_temp_max: + *val = bp->shutdown_thresh_temp * 1000; + return 0; + case hwmon_temp_lcrit_alarm: + rc = bnxt_hwrm_temp_query(bp, &temp); + if (!rc) + *val = temp >= bp->warn_thresh_temp; + return rc; + case hwmon_temp_crit_alarm: + rc = bnxt_hwrm_temp_query(bp, &temp); + if (!rc) + *val = temp >= bp->crit_thresh_temp; + return rc; + case hwmon_temp_emergency_alarm: + rc = bnxt_hwrm_temp_query(bp, &temp); + if (!rc) + *val = temp >= bp->fatal_thresh_temp; + return rc; + case hwmon_temp_max_alarm: + rc = bnxt_hwrm_temp_query(bp, &temp); + if (!rc) + *val = temp >= bp->shutdown_thresh_temp; + return rc; default: return -EOPNOTSUPP; } @@ -73,7 +132,11 @@ static int bnxt_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 static const struct hwmon_channel_info *bnxt_hwmon_info[] = { HWMON_CHANNEL_INFO(temp, - HWMON_T_INPUT), + HWMON_T_INPUT | + HWMON_T_MAX | HWMON_T_LCRIT | + HWMON_T_CRIT | HWMON_T_EMERGENCY | + HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM | + HWMON_T_MAX_ALARM | HWMON_T_EMERGENCY_ALARM), NULL }; @@ -97,13 +160,11 @@ void bnxt_hwmon_uninit(struct bnxt *bp) void bnxt_hwmon_init(struct bnxt *bp) { - struct hwrm_temp_monitor_query_input *req; struct pci_dev *pdev = bp->pdev; int rc; - rc = hwrm_req_init(bp, req, HWRM_TEMP_MONITOR_QUERY); - if (!rc) - rc = hwrm_req_send_silent(bp, req); + /* temp1_xxx is only sensor, ensure not registered if it will fail */ + rc = bnxt_hwrm_temp_query(bp, NULL); if (rc == -EACCES || rc == -EOPNOTSUPP) { bnxt_hwmon_uninit(bp); return;