diff mbox series

[v2,3/4] hwmon (it87): Test for chipset before entering configuration mode

Message ID 20240427083317.2077175-4-frank@crawford.emu.id.au (mailing list archive)
State Superseded
Headers show
Series hwmon (it87): Correct handling for configuration mode | expand

Commit Message

Frank Crawford April 27, 2024, 8:33 a.m. UTC
Major part of the change for the new method to avoid chipset issues.

The actual update does the following:

1) Lock the memory, but does not perform a SIO entry (previously it
would have performed an SIO entry).

2) Attempt to read the chipID.  This should be safe no matter which
chip we have.

3) If step (2) fails, then perform SIO entry and retry chipID read.  For
older chips and on failure it acts similarly to prior to this patch.

4) Set the sio_data->type, similar to previously.

5) If we have not performed an SIO entry, and this is not a chip type
with the NOCONF feature, then it will perform an SIO entry at this
point.

6) Proceed with setup as prior to this patch.

7) Any following access to the SIO registers will invoke the SIO entry
and SIO exit steps unless it is a chip with the NOCONF feature set.
This was set up in the previous patches in this patchset.

8) There is also some minor update to the failure exit based on if it
had performed a SIO entry or not, in addition to the previous tests.

Signed-off-by: Frank Crawford <frank@crawford.emu.id.au>
---
 drivers/hwmon/it87.c | 52 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 5 deletions(-)

Comments

Guenter Roeck April 27, 2024, 11:11 a.m. UTC | #1
On 4/27/24 01:33, Frank Crawford wrote:
> Major part of the change for the new method to avoid chipset issues.
> 
> The actual update does the following:
> 
> 1) Lock the memory, but does not perform a SIO entry (previously it
> would have performed an SIO entry).
> 
> 2) Attempt to read the chipID.  This should be safe no matter which
> chip we have.
> 
> 3) If step (2) fails, then perform SIO entry and retry chipID read.  For
> older chips and on failure it acts similarly to prior to this patch.
> 
> 4) Set the sio_data->type, similar to previously.
> 
> 5) If we have not performed an SIO entry, and this is not a chip type
> with the NOCONF feature, then it will perform an SIO entry at this
> point.
> 
> 6) Proceed with setup as prior to this patch.
> 
> 7) Any following access to the SIO registers will invoke the SIO entry
> and SIO exit steps unless it is a chip with the NOCONF feature set.
> This was set up in the previous patches in this patchset.
> 
> 8) There is also some minor update to the failure exit based on if it
> had performed a SIO entry or not, in addition to the previous tests.
> 
> Signed-off-by: Frank Crawford <frank@crawford.emu.id.au>
> ---
>   drivers/hwmon/it87.c | 52 +++++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 47 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
> index 396c2d3afbf7..6a77f2f6e1e1 100644
> --- a/drivers/hwmon/it87.c
> +++ b/drivers/hwmon/it87.c
> @@ -2667,6 +2667,27 @@ static const struct attribute_group it87_group_auto_pwm = {
>   	.is_visible = it87_auto_pwm_is_visible,
>   };
>   
> +/*
> + * Original explanation:
> + * On various Gigabyte AM4 boards (AB350, AX370), the second Super-IO chip
> + * (IT8792E) needs to be in configuration mode before accessing the first
> + * due to a bug in IT8792E which otherwise results in LPC bus access errors.
> + * This needs to be done before accessing the first Super-IO chip since
> + * the second chip may have been accessed prior to loading this driver.
> + *
> + * The problem is also reported to affect IT8795E, which is used on X299 boards
> + * and has the same chip ID as IT8792E (0x8733). It also appears to affect
> + * systems with IT8790E, which is used on some Z97X-Gaming boards as well as
> + * Z87X-OC.
> + *
> + * From other information supplied:
> + * ChipIDs 0x8733, 0x8695 (early ID for IT87952E) and 0x8790 are intialised
> + * and left in configuration mode, and entering and/or exiting configuration
> + * mode is what causes the crash.
> + *
> + * The recommendation is to look up the chipID before doing any mode swap
> + * and then act accordingly.
> + */
>   /* SuperIO detection - will change isa_address if a chip is found */
>   static int __init it87_find(int sioaddr, unsigned short *address,
>   			    struct it87_sio_data *sio_data, int chip_cnt)
> @@ -2674,16 +2695,25 @@ static int __init it87_find(int sioaddr, unsigned short *address,
>   	int err;
>   	u16 chip_type;
>   	const struct it87_devices *config = NULL;
> +	bool opened = false;
>   
> -	err = superio_enter(sioaddr, false);
> +	/* First step, lock memory but don't enter configuration mode */
> +	err = superio_enter(sioaddr, true);
>   	if (err)
>   		return err;
>   
>   	err = -ENODEV;
>   	chip_type = superio_inw(sioaddr, DEVID);
> -	/* check first for a valid chip before forcing chip id */
> -	if (chip_type == 0xffff)
> -		goto exit;
> +	/* Check for a valid chip before forcing chip id */
> +	if (chip_type == 0xffff) {
> +		/* Enter configuration mode */
> +		__superio_enter(sioaddr);
> +		opened = true;
> +		/* and then try again */
> +		chip_type = superio_inw(sioaddr, DEVID);
> +		if (chip_type == 0xffff)
> +			goto exit;
> +	}
>   
>   	if (force_id_cnt == 1) {
>   		/* If only one value given use for all chips */
> @@ -2767,6 +2797,18 @@ static int __init it87_find(int sioaddr, unsigned short *address,
>   
>   	config = &it87_devices[sio_data->type];
>   
> +	/*
> +	 * If previously we didn't enter configuration mode and it isn't a
> +	 * chip we know is initialised in configuration mode, then enter
> +	 * configuration mode.
> +	 *
> +	 * I don't know if any such chips can exist but be defensive.
> +	 */
> +	if (!opened && !has_noconf(config)) {
> +		__superio_enter(sioaddr);
> +		opened = true;
> +	}
> +
>   	superio_select(sioaddr, PME);
>   	if (!(superio_inb(sioaddr, IT87_ACT_REG) & 0x01)) {
>   		pr_info("Device (chip %s ioreg 0x%x) not activated, skipping\n",
> @@ -3144,7 +3186,7 @@ static int __init it87_find(int sioaddr, unsigned short *address,
>   	}
>   
>   exit:
> -	superio_exit(sioaddr, config ? has_noconf(config) : false);
> +	superio_exit(sioaddr, opened && config && has_noconf(config));

If 'opened' is false, this could be an affected chip. Are you sure
that it makes sense to pass 'false' as parameter here in that case ?
Doesn't that mean that the chip might be one of the affected chips,
but the superio exit sequence would be executed anyway ?
Am I missing something ?

Guenter
Frank Crawford April 27, 2024, 11:53 a.m. UTC | #2
On Sat, 2024-04-27 at 04:11 -0700, Guenter Roeck wrote:
> On 4/27/24 01:33, Frank Crawford wrote:
...
> > @@ -3144,7 +3186,7 @@ static int __init it87_find(int sioaddr,
> > unsigned short *address,
> >    }
> >   
> >   exit:
> > - superio_exit(sioaddr, config ? has_noconf(config) : false);
> > + superio_exit(sioaddr, opened && config && has_noconf(config));
> 
> If 'opened' is false, this could be an affected chip. Are you sure
> that it makes sense to pass 'false' as parameter here in that case ?
> Doesn't that mean that the chip might be one of the affected chips,
> but the superio exit sequence would be executed anyway ?
> Am I missing something ?

Ohh, you may be right, I think I have got myself confused here with
opened and how it is used in superio_exit.

I think it should be !opened, but I will just check I still haven't
messed it up.

> 
> Guenter

Thanks
Frank
Guenter Roeck April 27, 2024, 5 p.m. UTC | #3
On 4/27/24 04:53, Frank Crawford wrote:
> On Sat, 2024-04-27 at 04:11 -0700, Guenter Roeck wrote:
>> On 4/27/24 01:33, Frank Crawford wrote:
> ...
>>> @@ -3144,7 +3186,7 @@ static int __init it87_find(int sioaddr,
>>> unsigned short *address,
>>>     }
>>>    
>>>    exit:
>>> - superio_exit(sioaddr, config ? has_noconf(config) : false);
>>> + superio_exit(sioaddr, opened && config && has_noconf(config));
>>
>> If 'opened' is false, this could be an affected chip. Are you sure
>> that it makes sense to pass 'false' as parameter here in that case ?
>> Doesn't that mean that the chip might be one of the affected chips,
>> but the superio exit sequence would be executed anyway ?
>> Am I missing something ?
> 
> Ohh, you may be right, I think I have got myself confused here with
> opened and how it is used in superio_exit.
> 

It took me a while to understand as well. The double negation of the
'noentry' parameter makes it difficult to understand.

> I think it should be !opened, but I will just check I still haven't
> messed it up.
> 

Maybe it should be something like
	!config && !opened ||		/* no or unknown chip, not enabled */
	config && has_noconf(config)	/* chip known to be affected */

which would translate "don't disable configuration mode for affected chips
and for unknown chips if configuration mode was not enabled".

Btw, I think "enabled" might be a better variable name.

Thanks,
Guenter
Frank Crawford April 28, 2024, 4:31 a.m. UTC | #4
On Sat, 2024-04-27 at 10:00 -0700, Guenter Roeck wrote:
> On 4/27/24 04:53, Frank Crawford wrote:
> > On Sat, 2024-04-27 at 04:11 -0700, Guenter Roeck wrote:
> > > On 4/27/24 01:33, Frank Crawford wrote:
> > ...
> > > > @@ -3144,7 +3186,7 @@ static int __init it87_find(int sioaddr,
> > > > unsigned short *address,
> > > >     }
> > > >    
> > > >    exit:
> > > > - superio_exit(sioaddr, config ? has_noconf(config) : false);
> > > > + superio_exit(sioaddr, opened && config &&
> > > > has_noconf(config));
> > > 
> > > If 'opened' is false, this could be an affected chip. Are you
> > > sure
> > > that it makes sense to pass 'false' as parameter here in that
> > > case ?
> > > Doesn't that mean that the chip might be one of the affected
> > > chips,
> > > but the superio exit sequence would be executed anyway ?
> > > Am I missing something ?
> > 
> > Ohh, you may be right, I think I have got myself confused here with
> > opened and how it is used in superio_exit.
> > 
> 
> It took me a while to understand as well. The double negation of the
> 'noentry' parameter makes it difficult to understand.
> 
> > I think it should be !opened, but I will just check I still haven't
> > messed it up.
> > 
> 
> Maybe it should be something like
>  !config && !opened || /* no or unknown chip, not enabled */
>  config && has_noconf(config) /* chip known to be affected */
> 
> which would translate "don't disable configuration mode for affected
> chips
> and for unknown chips if configuration mode was not enabled".

In fact, I think in this case that entire expression can be simplified
to just "!opened", as we only want to perform the exit code when the
entry code was given, and it is only set to true in those cases.

The only points where there is some "concern" is where it is an unknown
chipID, in which case we should not send the exit unless we previously
needed it to get a chipID (so probably a valid chip we don't know
about), or in the case of an error, which almost certainly means that
sending the code will have no effect, and is actually the same as
previously anyway.

> 
> Btw, I think "enabled" might be a better variable name.

Yes, it probably is.  I'll rename the variable.

I'll send a new version of this patch with those updates.
> 
> Thanks,
> Guenter

Regards
Frank
Guenter Roeck April 28, 2024, 7:03 a.m. UTC | #5
On 4/27/24 21:31, Frank Crawford wrote:
> 
> On Sat, 2024-04-27 at 10:00 -0700, Guenter Roeck wrote:
>> On 4/27/24 04:53, Frank Crawford wrote:
>>> On Sat, 2024-04-27 at 04:11 -0700, Guenter Roeck wrote:
>>>> On 4/27/24 01:33, Frank Crawford wrote:
>>> ...
>>>>> @@ -3144,7 +3186,7 @@ static int __init it87_find(int sioaddr,
>>>>> unsigned short *address,
>>>>>      }
>>>>>     
>>>>>     exit:
>>>>> - superio_exit(sioaddr, config ? has_noconf(config) : false);
>>>>> + superio_exit(sioaddr, opened && config &&
>>>>> has_noconf(config));
>>>>
>>>> If 'opened' is false, this could be an affected chip. Are you
>>>> sure
>>>> that it makes sense to pass 'false' as parameter here in that
>>>> case ?
>>>> Doesn't that mean that the chip might be one of the affected
>>>> chips,
>>>> but the superio exit sequence would be executed anyway ?
>>>> Am I missing something ?
>>>
>>> Ohh, you may be right, I think I have got myself confused here with
>>> opened and how it is used in superio_exit.
>>>
>>
>> It took me a while to understand as well. The double negation of the
>> 'noentry' parameter makes it difficult to understand.
>>
>>> I think it should be !opened, but I will just check I still haven't
>>> messed it up.
>>>
>>
>> Maybe it should be something like
>>   !config && !opened || /* no or unknown chip, not enabled */
>>   config && has_noconf(config) /* chip known to be affected */
>>
>> which would translate "don't disable configuration mode for affected
>> chips
>> and for unknown chips if configuration mode was not enabled".
> 
> In fact, I think in this case that entire expression can be simplified
> to just "!opened", as we only want to perform the exit code when the
> entry code was given, and it is only set to true in those cases.
> 

Not really, because it is at least theoretically possible that it was
necessary to enter configuration mode to determine that the chip
is one of the broken ones. I'll leave that up to you to make the call,
though.

Guenter
Frank Crawford April 28, 2024, 7:35 a.m. UTC | #6
On Sun, 2024-04-28 at 00:03 -0700, Guenter Roeck wrote:
> On 4/27/24 21:31, Frank Crawford wrote:
...
> > In fact, I think in this case that entire expression can be
> > simplified
> > to just "!opened", as we only want to perform the exit code when
> > the
> > entry code was given, and it is only set to true in those cases.
> > 
> 
> Not really, because it is at least theoretically possible that it was
> necessary to enter configuration mode to determine that the chip
> is one of the broken ones. I'll leave that up to you to make the
> call,
> though.

I guess it is theoretically possible, if some other driver has done
something, but as well I guess this patch is different in that it
doesn't do the second chip first, and so is still different.

However, in the only chipset we have been able to reliably lock up
lately we haven't been able to read the chipID even after trying to
enter configuration mode, when we have locked up the chip, so I don't
think this issue will actually happen in practice.

I will still try to get hold of the configuration documentation, to see
any additional notes from the vendor.  I don't expect that soon though.
> 
> Guenter

Regards
Frank
diff mbox series

Patch

diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index 396c2d3afbf7..6a77f2f6e1e1 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -2667,6 +2667,27 @@  static const struct attribute_group it87_group_auto_pwm = {
 	.is_visible = it87_auto_pwm_is_visible,
 };
 
+/*
+ * Original explanation:
+ * On various Gigabyte AM4 boards (AB350, AX370), the second Super-IO chip
+ * (IT8792E) needs to be in configuration mode before accessing the first
+ * due to a bug in IT8792E which otherwise results in LPC bus access errors.
+ * This needs to be done before accessing the first Super-IO chip since
+ * the second chip may have been accessed prior to loading this driver.
+ *
+ * The problem is also reported to affect IT8795E, which is used on X299 boards
+ * and has the same chip ID as IT8792E (0x8733). It also appears to affect
+ * systems with IT8790E, which is used on some Z97X-Gaming boards as well as
+ * Z87X-OC.
+ *
+ * From other information supplied:
+ * ChipIDs 0x8733, 0x8695 (early ID for IT87952E) and 0x8790 are intialised
+ * and left in configuration mode, and entering and/or exiting configuration
+ * mode is what causes the crash.
+ *
+ * The recommendation is to look up the chipID before doing any mode swap
+ * and then act accordingly.
+ */
 /* SuperIO detection - will change isa_address if a chip is found */
 static int __init it87_find(int sioaddr, unsigned short *address,
 			    struct it87_sio_data *sio_data, int chip_cnt)
@@ -2674,16 +2695,25 @@  static int __init it87_find(int sioaddr, unsigned short *address,
 	int err;
 	u16 chip_type;
 	const struct it87_devices *config = NULL;
+	bool opened = false;
 
-	err = superio_enter(sioaddr, false);
+	/* First step, lock memory but don't enter configuration mode */
+	err = superio_enter(sioaddr, true);
 	if (err)
 		return err;
 
 	err = -ENODEV;
 	chip_type = superio_inw(sioaddr, DEVID);
-	/* check first for a valid chip before forcing chip id */
-	if (chip_type == 0xffff)
-		goto exit;
+	/* Check for a valid chip before forcing chip id */
+	if (chip_type == 0xffff) {
+		/* Enter configuration mode */
+		__superio_enter(sioaddr);
+		opened = true;
+		/* and then try again */
+		chip_type = superio_inw(sioaddr, DEVID);
+		if (chip_type == 0xffff)
+			goto exit;
+	}
 
 	if (force_id_cnt == 1) {
 		/* If only one value given use for all chips */
@@ -2767,6 +2797,18 @@  static int __init it87_find(int sioaddr, unsigned short *address,
 
 	config = &it87_devices[sio_data->type];
 
+	/*
+	 * If previously we didn't enter configuration mode and it isn't a
+	 * chip we know is initialised in configuration mode, then enter
+	 * configuration mode.
+	 *
+	 * I don't know if any such chips can exist but be defensive.
+	 */
+	if (!opened && !has_noconf(config)) {
+		__superio_enter(sioaddr);
+		opened = true;
+	}
+
 	superio_select(sioaddr, PME);
 	if (!(superio_inb(sioaddr, IT87_ACT_REG) & 0x01)) {
 		pr_info("Device (chip %s ioreg 0x%x) not activated, skipping\n",
@@ -3144,7 +3186,7 @@  static int __init it87_find(int sioaddr, unsigned short *address,
 	}
 
 exit:
-	superio_exit(sioaddr, config ? has_noconf(config) : false);
+	superio_exit(sioaddr, opened && config && has_noconf(config));
 	return err;
 }