diff mbox series

[v2,2/2] nvme: hwmon: add quirk to avoid changing temperature threshold

Message ID 1573746001-20979-3-git-send-email-akinobu.mita@gmail.com (mailing list archive)
State Not Applicable
Headers show
Series nvme: hwmon: provide temperature threshold features | expand

Commit Message

Akinobu Mita Nov. 14, 2019, 3:40 p.m. UTC
This adds a new quirk NVME_QUIRK_NO_TEMP_THRESH_CHANGE to avoid changing
the value of the temperature threshold feature for specific devices that
show undesirable behavior.

Guenter reported:

"On my Intel NVME drive (SSDPEKKW512G7), writing any minimum limit on the
Composite temperature sensor results in a temperature warning, and that
warning is sticky until I reset the controller.

It doesn't seem to matter which temperature I write; writing -273000 has
the same result."

The Intel NVMe has the latest firmware version installed, so this isn't
a problem that was ever fixed.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Jens Axboe <axboe@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Jean Delvare <jdelvare@suse.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
---
 drivers/nvme/host/nvme-hwmon.c | 6 +++++-
 drivers/nvme/host/nvme.h       | 5 +++++
 drivers/nvme/host/pci.c        | 3 ++-
 3 files changed, 12 insertions(+), 2 deletions(-)

Comments

Guenter Roeck Nov. 14, 2019, 7:14 p.m. UTC | #1
On Fri, Nov 15, 2019 at 12:40:01AM +0900, Akinobu Mita wrote:
> This adds a new quirk NVME_QUIRK_NO_TEMP_THRESH_CHANGE to avoid changing
> the value of the temperature threshold feature for specific devices that
> show undesirable behavior.
> 
> Guenter reported:
> 
> "On my Intel NVME drive (SSDPEKKW512G7), writing any minimum limit on the
> Composite temperature sensor results in a temperature warning, and that
> warning is sticky until I reset the controller.
> 
> It doesn't seem to matter which temperature I write; writing -273000 has
> the same result."
> 
> The Intel NVMe has the latest firmware version installed, so this isn't
> a problem that was ever fixed.
> 
> Reported-by: Guenter Roeck <linux@roeck-us.net>
> Cc: Keith Busch <kbusch@kernel.org>
> Cc: Jens Axboe <axboe@fb.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Sagi Grimberg <sagi@grimberg.me>
> Cc: Jean Delvare <jdelvare@suse.com>
> Cc: Guenter Roeck <linux@roeck-us.net>
> Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>

Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>

Tested on:
	INTEL SSDPEKKW512G7

Specifically verified that min/max attributes are indeed read-only
on this drive.

nvme-pci-0100
Adapter: PCI adapter
Composite:    +32.9°C  (low  = -273.1°C, high = +69.8°C)
                       (crit = +79.8°C)

groeck@jupiter:/sys/class/hwmon/hwmon0$ ls -l
total 0
lrwxrwxrwx 1 root root    0 Nov 14 10:59 device -> ../../../0000:01:00.0
-r--r--r-- 1 root root 4096 Nov 14 10:59 name
drwxr-xr-x 2 root root    0 Nov 14 10:59 power
lrwxrwxrwx 1 root root    0 Nov 14 10:59 subsystem -> ../../../../../../class/hwmon
-r--r--r-- 1 root root 4096 Nov 14 10:59 temp1_alarm
-r--r--r-- 1 root root 4096 Nov 14 10:59 temp1_crit
-r--r--r-- 1 root root 4096 Nov 14 10:59 temp1_input
-r--r--r-- 1 root root 4096 Nov 14 10:59 temp1_label
-r--r--r-- 1 root root 4096 Nov 14 10:59 temp1_max
-r--r--r-- 1 root root 4096 Nov 14 10:59 temp1_min
-rw-r--r-- 1 root root 4096 Nov 14 10:59 uevent

groeck@jupiter:/sys/class/hwmon/hwmon0$ sudo nvme list
Node             SN                   Model                                    Namespace Usage                      Format           FW Rev  
---------------- -------------------- ---------------------------------------- --------- -------------------------- ---------------- --------
/dev/nvme0n1     BTPY65250EQN512F     INTEL SSDPEKKW512G7                      1         512.11  GB / 512.11  GB    512   B +  0 B    PSF121C

> ---
>  drivers/nvme/host/nvme-hwmon.c | 6 +++++-
>  drivers/nvme/host/nvme.h       | 5 +++++
>  drivers/nvme/host/pci.c        | 3 ++-
>  3 files changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/nvme/host/nvme-hwmon.c b/drivers/nvme/host/nvme-hwmon.c
> index 97a84b4..a5af21f 100644
> --- a/drivers/nvme/host/nvme-hwmon.c
> +++ b/drivers/nvme/host/nvme-hwmon.c
> @@ -170,8 +170,12 @@ static umode_t nvme_hwmon_is_visible(const void *_data,
>  	case hwmon_temp_max:
>  	case hwmon_temp_min:
>  		if ((!channel && data->ctrl->wctemp) ||
> -		    (channel && data->log.temp_sensor[channel - 1]))
> +		    (channel && data->log.temp_sensor[channel - 1])) {
> +			if (data->ctrl->quirks &
> +			    NVME_QUIRK_NO_TEMP_THRESH_CHANGE)
> +				return 0444;
>  			return 0644;
> +		}
>  		break;
>  	case hwmon_temp_alarm:
>  		if (!channel)
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 000a3d9..19e5e87 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -115,6 +115,11 @@ enum nvme_quirks {
>  	 * Prevent tag overlap between queues
>  	 */
>  	NVME_QUIRK_SHARED_TAGS                  = (1 << 13),
> +
> +	/*
> +	 * Don't change the value of the temperature threshold feature
> +	 */
> +	NVME_QUIRK_NO_TEMP_THRESH_CHANGE	= (1 << 14),
>  };
>  
>  /*
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 931d4a9..2c0206b 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -3529,7 +3529,8 @@ static const struct pci_device_id nvme_id_table[] = {
>  				NVME_QUIRK_DEALLOCATE_ZEROES, },
>  	{ PCI_VDEVICE(INTEL, 0xf1a5),	/* Intel 600P/P3100 */
>  		.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
> -				NVME_QUIRK_MEDIUM_PRIO_SQ },
> +				NVME_QUIRK_MEDIUM_PRIO_SQ |
> +				NVME_QUIRK_NO_TEMP_THRESH_CHANGE },
>  	{ PCI_VDEVICE(INTEL, 0xf1a6),	/* Intel 760p/Pro 7600p */
>  		.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
>  	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
> -- 
> 2.7.4
>
Christoph Hellwig Nov. 20, 2019, 6:48 p.m. UTC | #2
Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>
diff mbox series

Patch

diff --git a/drivers/nvme/host/nvme-hwmon.c b/drivers/nvme/host/nvme-hwmon.c
index 97a84b4..a5af21f 100644
--- a/drivers/nvme/host/nvme-hwmon.c
+++ b/drivers/nvme/host/nvme-hwmon.c
@@ -170,8 +170,12 @@  static umode_t nvme_hwmon_is_visible(const void *_data,
 	case hwmon_temp_max:
 	case hwmon_temp_min:
 		if ((!channel && data->ctrl->wctemp) ||
-		    (channel && data->log.temp_sensor[channel - 1]))
+		    (channel && data->log.temp_sensor[channel - 1])) {
+			if (data->ctrl->quirks &
+			    NVME_QUIRK_NO_TEMP_THRESH_CHANGE)
+				return 0444;
 			return 0644;
+		}
 		break;
 	case hwmon_temp_alarm:
 		if (!channel)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 000a3d9..19e5e87 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -115,6 +115,11 @@  enum nvme_quirks {
 	 * Prevent tag overlap between queues
 	 */
 	NVME_QUIRK_SHARED_TAGS                  = (1 << 13),
+
+	/*
+	 * Don't change the value of the temperature threshold feature
+	 */
+	NVME_QUIRK_NO_TEMP_THRESH_CHANGE	= (1 << 14),
 };
 
 /*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 931d4a9..2c0206b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3529,7 +3529,8 @@  static const struct pci_device_id nvme_id_table[] = {
 				NVME_QUIRK_DEALLOCATE_ZEROES, },
 	{ PCI_VDEVICE(INTEL, 0xf1a5),	/* Intel 600P/P3100 */
 		.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
-				NVME_QUIRK_MEDIUM_PRIO_SQ },
+				NVME_QUIRK_MEDIUM_PRIO_SQ |
+				NVME_QUIRK_NO_TEMP_THRESH_CHANGE },
 	{ PCI_VDEVICE(INTEL, 0xf1a6),	/* Intel 760p/Pro 7600p */
 		.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
 	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */