diff mbox series

[3/6] hwmon: (k10temp) Check return value of amd_smn_read()

Message ID 20230516202430.4157216-4-yazen.ghannam@amd.com (mailing list archive)
State New, archived
Headers show
Series Enhance AMD SMN Error Checking | expand

Commit Message

Yazen Ghannam May 16, 2023, 8:24 p.m. UTC
Check the return value of amd_smn_read() before saving a value. This
ensures invalid values aren't saved or used.

There are three cases here with slightly different behavior.

1) read_tempreg_nb_zen():
	This is a function pointer which does not include a return code.
	In this case, set the register value to 0 on failure. This
	enforces Read-as-Zero behavior.

2) k10temp_read_temp():
	This function does have return codes, so return -EINVAL on a
	failed register read. Continued operation is not necessary,
	since there is no valid data from the register. Furthermore, if
	the register value was set to 0, then the following operation
	would underflow.

3) k10temp_get_ccd_support():
	This function reads the same register from multiple CCD
	instances in a loop. And a bitmask is formed if a specific bit
	is set in each register instance. The loop should continue on a
	failed register read, skipping the bit check.

Furthermore, the __must_check attribute will be added to amd_smn_read().
Therefore, this change is required to avoid compile-time warnings.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/hwmon/k10temp.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

Comments

Guenter Roeck May 17, 2023, 12:25 p.m. UTC | #1
On Tue, May 16, 2023 at 03:24:27PM -0500, Yazen Ghannam wrote:
> Check the return value of amd_smn_read() before saving a value. This
> ensures invalid values aren't saved or used.
> 
> There are three cases here with slightly different behavior.
> 
> 1) read_tempreg_nb_zen():
> 	This is a function pointer which does not include a return code.
> 	In this case, set the register value to 0 on failure. This
> 	enforces Read-as-Zero behavior.
> 
> 2) k10temp_read_temp():
> 	This function does have return codes, so return -EINVAL on a
> 	failed register read. Continued operation is not necessary,
> 	since there is no valid data from the register. Furthermore, if
> 	the register value was set to 0, then the following operation
> 	would underflow.
> 
> 3) k10temp_get_ccd_support():
> 	This function reads the same register from multiple CCD
> 	instances in a loop. And a bitmask is formed if a specific bit
> 	is set in each register instance. The loop should continue on a
> 	failed register read, skipping the bit check.
> 
> Furthermore, the __must_check attribute will be added to amd_smn_read().
> Therefore, this change is required to avoid compile-time warnings.
> 
> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> Cc: stable@vger.kernel.org
> ---
>  drivers/hwmon/k10temp.c | 19 ++++++++++++-------
>  1 file changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
> index 7b177b9fbb09..6ea1fa62b7c1 100644
> --- a/drivers/hwmon/k10temp.c
> +++ b/drivers/hwmon/k10temp.c
> @@ -145,8 +145,9 @@ static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
>  
>  static void read_tempreg_nb_zen(struct pci_dev *pdev, u32 *regval)
>  {
> -	amd_smn_read(amd_pci_dev_to_node_id(pdev),
> -		     ZEN_REPORTED_TEMP_CTRL_BASE, regval);
> +	if (amd_smn_read(amd_pci_dev_to_node_id(pdev),
> +			 ZEN_REPORTED_TEMP_CTRL_BASE, regval))
> +		*regval = 0;
>  }
>  
>  static long get_raw_temp(struct k10temp_data *data)
> @@ -213,9 +214,11 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
>  				*val = 0;
>  			break;
>  		case 2 ... 13:		/* Tccd{1-12} */
> -			amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
> -				     ZEN_CCD_TEMP(data->ccd_offset, channel - 2),
> -						  &regval);
> +			if (amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
> +					 ZEN_CCD_TEMP(data->ccd_offset, channel - 2),
> +					 &regval))
> +				return -EINVAL;
> +

-EINVAL: Invalid Argument, supposed to be used for bad user input.
I don't see how that would apply here. amd_smn_read() returns
a valid error code. This error core should be returned to the caller,
or there needs to be an explanation why this is not appropriate.

>  			*val = (regval & ZEN_CCD_TEMP_MASK) * 125 - 49000;
>  			break;
>  		default:
> @@ -373,8 +376,10 @@ static void k10temp_get_ccd_support(struct pci_dev *pdev,
>  	int i;
>  
>  	for (i = 0; i < limit; i++) {
> -		amd_smn_read(amd_pci_dev_to_node_id(pdev),
> -			     ZEN_CCD_TEMP(data->ccd_offset, i), &regval);
> +		if (amd_smn_read(amd_pci_dev_to_node_id(pdev),
> +				 ZEN_CCD_TEMP(data->ccd_offset, i), &regval))
> +			continue;
> +
The reason for ignoring the error should be explained here.

>  		if (regval & ZEN_CCD_TEMP_VALID)
>  			data->show_temp |= BIT(TCCD_BIT(i));
>  	}
> -- 
> 2.34.1
>
Yazen Ghannam May 17, 2023, 2:04 p.m. UTC | #2
On 5/17/23 8:25 AM, Guenter Roeck wrote:

[...]

>>  static long get_raw_temp(struct k10temp_data *data)
>> @@ -213,9 +214,11 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
>>  				*val = 0;
>>  			break;
>>  		case 2 ... 13:		/* Tccd{1-12} */
>> -			amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
>> -				     ZEN_CCD_TEMP(data->ccd_offset, channel - 2),
>> -						  &regval);
>> +			if (amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
>> +					 ZEN_CCD_TEMP(data->ccd_offset, channel - 2),
>> +					 &regval))
>> +				return -EINVAL;
>> +
> 
> -EINVAL: Invalid Argument, supposed to be used for bad user input.
> I don't see how that would apply here. amd_smn_read() returns
> a valid error code. This error core should be returned to the caller,
> or there needs to be an explanation why this is not appropriate.
>

Understood. Will change it to return the amd_smn_read() error code.

>>  			*val = (regval & ZEN_CCD_TEMP_MASK) * 125 - 49000;
>>  			break;
>>  		default:
>> @@ -373,8 +376,10 @@ static void k10temp_get_ccd_support(struct pci_dev *pdev,
>>  	int i;
>>  
>>  	for (i = 0; i < limit; i++) {
>> -		amd_smn_read(amd_pci_dev_to_node_id(pdev),
>> -			     ZEN_CCD_TEMP(data->ccd_offset, i), &regval);
>> +		if (amd_smn_read(amd_pci_dev_to_node_id(pdev),
>> +				 ZEN_CCD_TEMP(data->ccd_offset, i), &regval))
>> +			continue;
>> +
> The reason for ignoring the error should be explained here.
>

Sure thing. I'll add a code comment above.

Thanks,
Yazen
diff mbox series

Patch

diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 7b177b9fbb09..6ea1fa62b7c1 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -145,8 +145,9 @@  static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
 
 static void read_tempreg_nb_zen(struct pci_dev *pdev, u32 *regval)
 {
-	amd_smn_read(amd_pci_dev_to_node_id(pdev),
-		     ZEN_REPORTED_TEMP_CTRL_BASE, regval);
+	if (amd_smn_read(amd_pci_dev_to_node_id(pdev),
+			 ZEN_REPORTED_TEMP_CTRL_BASE, regval))
+		*regval = 0;
 }
 
 static long get_raw_temp(struct k10temp_data *data)
@@ -213,9 +214,11 @@  static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
 				*val = 0;
 			break;
 		case 2 ... 13:		/* Tccd{1-12} */
-			amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
-				     ZEN_CCD_TEMP(data->ccd_offset, channel - 2),
-						  &regval);
+			if (amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
+					 ZEN_CCD_TEMP(data->ccd_offset, channel - 2),
+					 &regval))
+				return -EINVAL;
+
 			*val = (regval & ZEN_CCD_TEMP_MASK) * 125 - 49000;
 			break;
 		default:
@@ -373,8 +376,10 @@  static void k10temp_get_ccd_support(struct pci_dev *pdev,
 	int i;
 
 	for (i = 0; i < limit; i++) {
-		amd_smn_read(amd_pci_dev_to_node_id(pdev),
-			     ZEN_CCD_TEMP(data->ccd_offset, i), &regval);
+		if (amd_smn_read(amd_pci_dev_to_node_id(pdev),
+				 ZEN_CCD_TEMP(data->ccd_offset, i), &regval))
+			continue;
+
 		if (regval & ZEN_CCD_TEMP_VALID)
 			data->show_temp |= BIT(TCCD_BIT(i));
 	}