diff mbox

ACPI: add "auto" to acpi_enforce_resources

Message ID 20090125210520.GA12963@dreamland.darkstar.lan (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Luca Tettamanti Jan. 25, 2009, 9:05 p.m. UTC
(This is an improved version of my earlier patch, I've reworked deviced
detection to simply check for the desired HID)

The following patch adds "auto" option to "acpi_enforce_resource"; this value
is also the new default.
"auto" enforces strict resource checking - disallowing access by native drivers
to IO ports and memory regions claimed by ACPI firmware - when a device with a
known ACPI driver is found (currently only ASUS ATK0110 is checked), and
reverts to lax checking otherwise.

The patch is mainly aimed to block native hwmon drivers from touching
monitoring chips that ACPI thinks it own.

Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>
---
New revision, now it simply checks the HID.

 Documentation/kernel-parameters.txt |   19 ++++++++++++++++
 drivers/acpi/osl.c                  |   41 +++++++++++++++++++++++++++++++++---
 2 files changed, 57 insertions(+), 3 deletions(-)



Luca

Comments

Hans de Goede Jan. 26, 2009, 8:37 a.m. UTC | #1
Luca Tettamanti wrote:
> (This is an improved version of my earlier patch, I've reworked deviced
> detection to simply check for the desired HID)
> 
> The following patch adds "auto" option to "acpi_enforce_resource"; this value
> is also the new default.
> "auto" enforces strict resource checking - disallowing access by native drivers
> to IO ports and memory regions claimed by ACPI firmware - when a device with a
> known ACPI driver is found (currently only ASUS ATK0110 is checked), and
> reverts to lax checking otherwise.
> 
> The patch is mainly aimed to block native hwmon drivers from touching
> monitoring chips that ACPI thinks it own.
> 
> Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>

This looks very good!

ACPI team, any chance we can get this in the mainline?

We want to add a driver (for the asus atk0110 ACPI interface) to the hwmon 
subsys which talks to hwmon IC's through ACPI, but before we can do that we 
must make sure that potentially conflicting native IC drivers do not load.

Thanks & Regards,

Hans

> ---
> New revision, now it simply checks the HID.
> 
>  Documentation/kernel-parameters.txt |   19 ++++++++++++++++
>  drivers/acpi/osl.c                  |   41 +++++++++++++++++++++++++++++++++---
>  2 files changed, 57 insertions(+), 3 deletions(-)
> 
> Index: b/Documentation/kernel-parameters.txt
> ===================================================================
> --- a/Documentation/kernel-parameters.txt	2009-01-17 21:22:49.218882286 +0100
> +++ b/Documentation/kernel-parameters.txt	2009-01-21 23:25:41.262478018 +0100
> @@ -258,6 +258,25 @@
>  			to assume that this machine's pmtimer latches its value
>  			and always returns good values.
>  
> +	acpi_enforce_resources=	[ACPI]
> +			{ strict, auto, lax, no }
> +			Check for resource conflicts between native drivers
> +			and ACPI OperationRegions (SystemIO and System Memory
> +			only). IO ports and memory declared in ACPI might be
> +			used by the ACPI subsystem in arbitrary AML code and
> +			can interfere with legacy drivers.
> +			strict: access to resources claimed by ACPI is denied;
> +			legacy drivers trying to access reserved resources
> +			will fail to load.
> +			auto (default): try and detect ACPI devices with known
> +			ACPI drivers; escalates the setting to "strict" if such
> +			a device is found, otherwise behaves like "lax".
> +			lax: access to resources claimed by ACPI is allowed;
> +			legacy drivers trying to access reserved resources
> +			will load and a warning message is logged.
> +			no: ACPI OperationRegions are not marked as reserved,
> +			no further checks are performed.
> +
>  	agp=		[AGP]
>  			{ off | try_unsupported }
>  			off: disable AGP support
> Index: b/drivers/acpi/osl.c
> ===================================================================
> --- a/drivers/acpi/osl.c	2009-01-17 21:22:49.190882303 +0100
> +++ b/drivers/acpi/osl.c	2009-01-25 22:04:30.759209000 +0100
> @@ -1063,7 +1063,10 @@
>   * in arbitrary AML code and can interfere with legacy drivers.
>   * acpi_enforce_resources= can be set to:
>   *
> - *   - strict           (2)
> + *   - auto             (2)
> + *     -> detect possible conflicts with ACPI drivers and switch to
> + *     strict if needed, otherwise act like lax
> + *   - strict           (3)
>   *     -> further driver trying to access the resources will not load
>   *   - lax (default)    (1)
>   *     -> further driver trying to access the resources will load, but you
> @@ -1073,11 +1076,12 @@
>   *     -> ACPI Operation Region resources will not be registered
>   *
>   */
> -#define ENFORCE_RESOURCES_STRICT 2
> +#define ENFORCE_RESOURCES_STRICT 3
> +#define ENFORCE_RESOURCES_AUTO   2
>  #define ENFORCE_RESOURCES_LAX    1
>  #define ENFORCE_RESOURCES_NO     0
>  
> -static unsigned int acpi_enforce_resources = ENFORCE_RESOURCES_LAX;
> +static unsigned int acpi_enforce_resources = ENFORCE_RESOURCES_AUTO;
>  
>  static int __init acpi_enforce_resources_setup(char *str)
>  {
> @@ -1086,6 +1090,8 @@
>  
>  	if (!strcmp("strict", str))
>  		acpi_enforce_resources = ENFORCE_RESOURCES_STRICT;
> +	else if (!strcmp("auto", str))
> +		acpi_enforce_resources = ENFORCE_RESOURCES_AUTO;
>  	else if (!strcmp("lax", str))
>  		acpi_enforce_resources = ENFORCE_RESOURCES_LAX;
>  	else if (!strcmp("no", str))
> @@ -1096,6 +1102,35 @@
>  
>  __setup("acpi_enforce_resources=", acpi_enforce_resources_setup);
>  
> +static acpi_status acpi_detect_asus_atk_cb(acpi_handle obj_handle,
> +		u32 nesting_level, void *context, void **return_value)
> +{
> +	*((bool *)return_value) = true;
> +	return AE_CTRL_TERMINATE;
> +}
> +
> +static int __init acpi_detect_asus_atk(void)
> +{
> +	acpi_status ret;
> +	bool found = false;
> +
> +	if (acpi_enforce_resources != ENFORCE_RESOURCES_AUTO)
> +		return 0;
> +
> +	ret = acpi_get_devices("ATK0110", acpi_detect_asus_atk_cb,
> +			NULL, (void **)&found);
> +
> +	if (ret == AE_OK && found) {
> +		printk(KERN_DEBUG "Asus ATK0110 interface detected: "
> +				"enforcing resource checking.\n");
> +		acpi_enforce_resources = ENFORCE_RESOURCES_STRICT;
> +	} else
> +		acpi_enforce_resources =  ENFORCE_RESOURCES_LAX;
> +
> +	return 0;
> +}
> +fs_initcall(acpi_detect_asus_atk);
> +
>  /* Check for resource conflicts between ACPI OperationRegions and native
>   * drivers */
>  int acpi_check_resource_conflict(struct resource *res)
> 
> 
> Luca
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Renninger Jan. 29, 2009, 10:30 a.m. UTC | #2
On Sunday 25 January 2009 22:05:20 Luca Tettamanti wrote:
> (This is an improved version of my earlier patch, I've reworked deviced
> detection to simply check for the desired HID)
>
> The following patch adds "auto" option to "acpi_enforce_resource"; this
> value is also the new default.
> "auto" enforces strict resource checking - disallowing access by native
> drivers to IO ports and memory regions claimed by ACPI firmware - when a
> device with a known ACPI driver is found (currently only ASUS ATK0110 is
> checked), and reverts to lax checking otherwise.
>
> The patch is mainly aimed to block native hwmon drivers from touching
> monitoring chips that ACPI thinks it own.
Having this in the core ACPI code is ugly.
Either this should be more generic (instead of hardcoded "ATK0110" device,
use a list to add further specific thermal ACPI drivers). Hmm, maybe it's
only ASUS violating the spec? Thinkpads seem to read out extra thermal
data from EC and shouldn't interfere with hwmon drivers. hp-wmi seem to
read hdd temp through wmi, don't know whether this could interfere with
hwmon, I expect not?  Are there other known, model specific ACPI devices, 
accessing thermal IOs directly which could interfere with hwmon, then it
might be worth it.

If not, on these ASUS there should be a specific hwmon driver interfering
with the ATK0110 device?
Can't you just add:

interfering_hwmon_driver.c:

#ifdef ACPI
acpi_search_for_ATK0110(){
    /* put code from below in here */
}
#endif

interfering_hwmon_driver_init/add(){
...
#ifdef ACPI
    if (!acpi_disabled)
      if (acpi_search_for_ATK0110()) {
        printk ("Do not use this hwmon driver, use asus_acpi to read the "
                "extra sensor.\n");
        return -1;
      else
        err = acpi_check_resource_conflict(&res);
    }
#endif

    Thomas

> Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>
> ---
> New revision, now it simply checks the HID.
>
>  Documentation/kernel-parameters.txt |   19 ++++++++++++++++
>  drivers/acpi/osl.c                  |   41
> +++++++++++++++++++++++++++++++++--- 2 files changed, 57 insertions(+), 3
> deletions(-)
>
> Index: b/Documentation/kernel-parameters.txt
> ===================================================================
> --- a/Documentation/kernel-parameters.txt	2009-01-17 21:22:49.218882286
> +0100 +++ b/Documentation/kernel-parameters.txt	2009-01-21
> 23:25:41.262478018 +0100 @@ -258,6 +258,25 @@
>  			to assume that this machine's pmtimer latches its value
>  			and always returns good values.
>
> +	acpi_enforce_resources=	[ACPI]
> +			{ strict, auto, lax, no }
> +			Check for resource conflicts between native drivers
> +			and ACPI OperationRegions (SystemIO and System Memory
> +			only). IO ports and memory declared in ACPI might be
> +			used by the ACPI subsystem in arbitrary AML code and
> +			can interfere with legacy drivers.
> +			strict: access to resources claimed by ACPI is denied;
> +			legacy drivers trying to access reserved resources
> +			will fail to load.
> +			auto (default): try and detect ACPI devices with known
> +			ACPI drivers; escalates the setting to "strict" if such
> +			a device is found, otherwise behaves like "lax".
> +			lax: access to resources claimed by ACPI is allowed;
> +			legacy drivers trying to access reserved resources
> +			will load and a warning message is logged.
> +			no: ACPI OperationRegions are not marked as reserved,
> +			no further checks are performed.
> +
>  	agp=		[AGP]
>  			{ off | try_unsupported }
>  			off: disable AGP support
> Index: b/drivers/acpi/osl.c
> ===================================================================
> --- a/drivers/acpi/osl.c	2009-01-17 21:22:49.190882303 +0100
> +++ b/drivers/acpi/osl.c	2009-01-25 22:04:30.759209000 +0100
> @@ -1063,7 +1063,10 @@
>   * in arbitrary AML code and can interfere with legacy drivers.
>   * acpi_enforce_resources= can be set to:
>   *
> - *   - strict           (2)
> + *   - auto             (2)
> + *     -> detect possible conflicts with ACPI drivers and switch to
> + *     strict if needed, otherwise act like lax
> + *   - strict           (3)
>   *     -> further driver trying to access the resources will not load
>   *   - lax (default)    (1)
>   *     -> further driver trying to access the resources will load, but you
> @@ -1073,11 +1076,12 @@
>   *     -> ACPI Operation Region resources will not be registered
>   *
>   */
> -#define ENFORCE_RESOURCES_STRICT 2
> +#define ENFORCE_RESOURCES_STRICT 3
> +#define ENFORCE_RESOURCES_AUTO   2
>  #define ENFORCE_RESOURCES_LAX    1
>  #define ENFORCE_RESOURCES_NO     0
>
> -static unsigned int acpi_enforce_resources = ENFORCE_RESOURCES_LAX;
> +static unsigned int acpi_enforce_resources = ENFORCE_RESOURCES_AUTO;
>
>  static int __init acpi_enforce_resources_setup(char *str)
>  {
> @@ -1086,6 +1090,8 @@
>
>  	if (!strcmp("strict", str))
>  		acpi_enforce_resources = ENFORCE_RESOURCES_STRICT;
> +	else if (!strcmp("auto", str))
> +		acpi_enforce_resources = ENFORCE_RESOURCES_AUTO;
>  	else if (!strcmp("lax", str))
>  		acpi_enforce_resources = ENFORCE_RESOURCES_LAX;
>  	else if (!strcmp("no", str))
> @@ -1096,6 +1102,35 @@
>
>  __setup("acpi_enforce_resources=", acpi_enforce_resources_setup);
>
> +static acpi_status acpi_detect_asus_atk_cb(acpi_handle obj_handle,
> +		u32 nesting_level, void *context, void **return_value)
> +{
> +	*((bool *)return_value) = true;
> +	return AE_CTRL_TERMINATE;
> +}
> +
> +static int __init acpi_detect_asus_atk(void)
> +{
> +	acpi_status ret;
> +	bool found = false;
> +
> +	if (acpi_enforce_resources != ENFORCE_RESOURCES_AUTO)
> +		return 0;
> +
> +	ret = acpi_get_devices("ATK0110", acpi_detect_asus_atk_cb,
> +			NULL, (void **)&found);
> +
> +	if (ret == AE_OK && found) {
> +		printk(KERN_DEBUG "Asus ATK0110 interface detected: "
> +				"enforcing resource checking.\n");
> +		acpi_enforce_resources = ENFORCE_RESOURCES_STRICT;
> +	} else
> +		acpi_enforce_resources =  ENFORCE_RESOURCES_LAX;
> +
> +	return 0;
> +}
> +fs_initcall(acpi_detect_asus_atk);
> +
>  /* Check for resource conflicts between ACPI OperationRegions and native
>   * drivers */
>  int acpi_check_resource_conflict(struct resource *res)
>
>
> Luca

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Luca Tettamanti Jan. 29, 2009, 3:16 p.m. UTC | #3
On Thu, Jan 29, 2009 at 11:30 AM, Thomas Renninger <trenn@suse.de> wrote:
> On Sunday 25 January 2009 22:05:20 Luca Tettamanti wrote:
>> (This is an improved version of my earlier patch, I've reworked deviced
>> detection to simply check for the desired HID)
>>
>> The following patch adds "auto" option to "acpi_enforce_resource"; this
>> value is also the new default.
>> "auto" enforces strict resource checking - disallowing access by native
>> drivers to IO ports and memory regions claimed by ACPI firmware - when a
>> device with a known ACPI driver is found (currently only ASUS ATK0110 is
>> checked), and reverts to lax checking otherwise.
>>
>> The patch is mainly aimed to block native hwmon drivers from touching
>> monitoring chips that ACPI thinks it own.
>
> Having this in the core ACPI code is ugly.

I think we all agree :-)

> Either this should be more generic (instead of hardcoded "ATK0110" device,
> use a list to add further specific thermal ACPI drivers). Hmm, maybe it's
> only ASUS violating the spec?

ASUS it's not actually violating any spec...

> Thinkpads seem to read out extra thermal
> data from EC and shouldn't interfere with hwmon drivers.

The point is that there is only 1 physical sensor, which both ACPI and
a native driver want to drive; transaction on SMBus to read the sensor
are usually in the form "select bank" + "read" and the sequence is
*not* atomic. In ASUS case the IO ports are correctly reserved by the
firmware, but (historically) this wasn't taken into account.
Aside from ASUS the problem is generic since there are two drivers
poking at the same hardware; for example there are reports of native
drivers interfering with normal TZ polling (see [1]). The EC does not
make any difference, since a native driver would speak directly to the
monitoring chip, effectively by-passing the EC.
Now, in principle "strict" is the correct behaviour for the resource
checking code, but it would break many working setups leaving the
users without any kind of hw monitoring. The "auto" hack is meant to
force the users to migrate to the ACPI driver...

> Are there other known, model specific ACPI devices,
> accessing thermal IOs directly which could interfere with hwmon, then it
> might be worth it.

Right now "auto" is ASUS-specific because *I* know that there any many
different native drivers that works on various boards (and I wrote the
the ACPI driver...); At least eeepc and (some?) thinkpads have ACPI
hwmon capabilities but I don't know whether there are native drivers
available or not (but they could be "blacklisted"  preemptively like
ASUS ATK).


Luca
[1] http://www.lm-sensors.org/ticket/2072 and:
http://thread.gmane.org/gmane.linux.drivers.sensors/12359
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Renninger Jan. 29, 2009, 4:29 p.m. UTC | #4
On Thursday 29 January 2009 16:16:38 Luca Tettamanti wrote:
> On Thu, Jan 29, 2009 at 11:30 AM, Thomas Renninger <trenn@suse.de> wrote:
> > On Sunday 25 January 2009 22:05:20 Luca Tettamanti wrote:
> >> (This is an improved version of my earlier patch, I've reworked deviced
> >> detection to simply check for the desired HID)
> >>
> >> The following patch adds "auto" option to "acpi_enforce_resource"; this
> >> value is also the new default.
> >> "auto" enforces strict resource checking - disallowing access by native
> >> drivers to IO ports and memory regions claimed by ACPI firmware - when a
> >> device with a known ACPI driver is found (currently only ASUS ATK0110 is
> >> checked), and reverts to lax checking otherwise.
> >>
> >> The patch is mainly aimed to block native hwmon drivers from touching
> >> monitoring chips that ACPI thinks it own.
> >
> > Having this in the core ACPI code is ugly.
>
> I think we all agree :-)
>
> > Either this should be more generic (instead of hardcoded "ATK0110"
> > device, use a list to add further specific thermal ACPI drivers). Hmm,
> > maybe it's only ASUS violating the spec?
>
> ASUS it's not actually violating any spec...
They have to export the temperature through a thermal ACPI
device, not through the ASUS specific ATK0110 device. This is
what I mean whether there might be other vendor specific ACPI
devices violating the spec (by providing temperature read
functionality which should be done through the generic thermal
ACPI device).

> > Thinkpads seem to read out extra thermal
> > data from EC and shouldn't interfere with hwmon drivers.
>
> The point is that there is only 1 physical sensor, which both ACPI and
> a native driver want to drive; transaction on SMBus to read the sensor
> are usually in the form "select bank" + "read" and the sequence is
> *not* atomic. In ASUS case the IO ports are correctly reserved by the
> firmware, but (historically) this wasn't taken into account.
I know about this problem.

> Aside from ASUS the problem is generic since there are two drivers
> poking at the same hardware; for example there are reports of native
> drivers interfering with normal TZ polling (see [1])
Yes, this is even worse as the check that you want to add will not
catch it. Looking a bit through ASUS DSDTs this seem to be common
on most of them. I put some example DSDT code of a recent M2A-VM-HDMI
workstation at the end.

> The EC does not
> make any difference, since a native driver would speak directly to the
> monitoring chip, effectively by-passing the EC.
> Now, in principle "strict" is the correct behaviour for the resource
> checking code, but it would break many working setups leaving the
> users without any kind of hw monitoring. The "auto" hack is meant to
> force the users to migrate to the ACPI driver...
>
> > Are there other known, model specific ACPI devices,
> > accessing thermal IOs directly which could interfere with hwmon, then it
> > might be worth it.
>
> Right now "auto" is ASUS-specific because *I* know that there any many
> different native drivers that works on various boards (and I wrote the
> the ACPI driver...);
Hmm, grep -r ATK0110 drivers/platform/x86 in latest ACPI test tree is
empty, can you give me a pointer to a recent version of your driver.

> At least eeepc and (some?) thinkpads have ACPI
> hwmon capabilities but I don't know whether there are native drivers
> available or not (but they could be "blacklisted"  preemptively like
> ASUS ATK).
I expect that ASUS only will interfere with specific hwmon driver(s)?
So why don't you move the check into the hwmon driver and make it not
load on these systems?
It's still ugly, but IMO better than to pollute the ACPI core.

Here some thermal DSDT parts of a recent ASUS machine:

The temperature exported through the generic thermal ACPI device:

    OperationRegion (IP, SystemIO, 0x022D, 0x02)
    Field (IP, ByteAcc, NoLock, Preserve)
    {
        INDX,   8,
        DAT0,   8
    }

    Method (SBYT, 2, NotSerialized)
    {
        Store (Arg0, INDX)
        Store (Arg1, DAT0)
    }

    Method (GBYT, 1, NotSerialized)
    {
        Store (Arg0, INDX)
        Store (DAT0, Local0)
        Return (Local0)
    }

Method (_TMP, 0, NotSerialized)
    {
    /* makes use of RTMP which uses GBYT and SBYT to actually read
       the temp
    */
    }

There is another RTMP function in ATK0110 (do you use that?)
The other RTMP in ATK0110 seem to use this IO area to read out
the temperatures (MBTE and TSR1, for mainboard and CPU temp?):

                OperationRegion (HWRE, SystemIO, 0x022D, 0x02)
                Field (HWRE, ByteAcc, NoLock, Preserve)
                {
                    HIDX,   8,
                    HDAT,   8
                }

                IndexField (HIDX, HDAT, ByteAcc, NoLock, Preserve)
                {
                            Offset (0x0B),
                    FD11,   3,
                    FD12,   3,
                    FD13,   1,
                            Offset (0x0C),
                            Offset (0x0D),
                    FAN1,   8,
                    FAN2,   8,
                            Offset (0x18),
                    FEN1,   8,
                    FEN2,   8,
                            Offset (0x20),
                    VCOR,   8,
                    V33V,   8,
                            Offset (0x23),
                    V50V,   8,
                    V12V,   8,
                            Offset (0x29),
                    TSR1,   8,
                    MBTE,   8,
                            Offset (0x80),
                    FAN3,   8,
                    FEN3,   8
                }

Can the hwmon people identify which hwmon driver is used
for above devices and the ATK0110 check be inserted there.

On the M2A-VM-HDMI machine where DSDT extracts from above are,
running libsensors I get:

Driver `it87' (should be inserted):
  Detects correctly:
  * ISA bus, address 0x228
    Chip `ITE IT8716F Super IO Sensors' (confidence: 9)

Driver `to-be-written' (should be inserted):
  Detects correctly:
  * Chip `AMD K10 thermal sensors' (confidence: 9)

If this is an ASUS only problem with very specific thermal
sensors, better add the quirk at the specific sensor driver.

If this is something general, then maybe you can convince Len
to add something to the ACPI core, but this should be more
generic then.

      Thomas

PS: I just realize that in ATK0110 the same thermal device
(IO port 0x22D) is used as in the generic ACPI device (_TMP).
So it looks like your asus thermal driver will not only interfere
with the hwmon driver, but with the ACPI thermal driver itself.
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hans de Goede Jan. 29, 2009, 6:58 p.m. UTC | #5
Thomas Renninger wrote:
>>> Either this should be more generic (instead of hardcoded "ATK0110"
>>> device, use a list to add further specific thermal ACPI drivers). Hmm,
>>> maybe it's only ASUS violating the spec?
>> ASUS it's not actually violating any spec...
> They have to export the temperature through a thermal ACPI
> device, not through the ASUS specific ATK0110 device. This is
> what I mean whether there might be other vendor specific ACPI
> devices violating the spec (by providing temperature read
> functionality which should be done through the generic thermal
> ACPI device).
> 

hwmon IC's monitor far more then just temperatures, ASUS is doing the right 
thing here by providing an ACPI interface to also read voltages and fan speeds, 
so that these can be read in a way that does not interfere with the ACPI code.

And although even their interface does not expose the full functionality of the 
hwmon IC's, it is much much better then what the thermal ACPI code gives us.

>>> Thinkpads seem to read out extra thermal
>>> data from EC and shouldn't interfere with hwmon drivers.
>> The point is that there is only 1 physical sensor, which both ACPI and
>> a native driver want to drive; transaction on SMBus to read the sensor
>> are usually in the form "select bank" + "read" and the sequence is
>> *not* atomic. In ASUS case the IO ports are correctly reserved by the
>> firmware, but (historically) this wasn't taken into account.
> I know about this problem.
> 
>> Aside from ASUS the problem is generic since there are two drivers
>> poking at the same hardware; for example there are reports of native
>> drivers interfering with normal TZ polling (see [1])
> Yes, this is even worse as the check that you want to add will not
> catch it.

Ack, so the proper solution would be to just make the acpi_enforce_resources 
strict by default, but this may cause lack of functionality for some user who 
are currently using native drivers for hwmon features. Which is why I (hwmon 
subsytem contributor and Jean Delvare (i2c and (ex)hwmon subsystem maintainer) 
proposed this auto setting as a compromise. I'm fine with changing the default 
to strict, AFAIK Jean prefers the auto setting.

<snip>

>> At least eeepc and (some?) thinkpads have ACPI
>> hwmon capabilities but I don't know whether there are native drivers
>> available or not (but they could be "blacklisted"  preemptively like
>> ASUS ATK).
> I expect that ASUS only will interfere with specific hwmon driver(s)?

Yes only those used on their motherboards, and given the wide range of 
motherboard ASUS produces and they offer the ATK0110 interface on almost all of 
them, it pretty much comes down to "all hwmon and smbus drivers", although I'm 
sure if we look we can find exceptions.

> So why don't you move the check into the hwmon driver and make it not
> load on these systems?
> It's still ugly, but IMO better than to pollute the ACPI core.
> 

Because we do not want to do this in a zillion places when it should be done in 
only one. The right solution is to make acpi_enforce_resources strict the 
default, the auto setting is meant as a slow migration path to that right 
solution, as doing that right now probably will cause lots of complaints.

Actually, the really right solution would be for the ACPI subsystem to actually 
claim all the resources using the regular resource tracking so that drivers who 
want to check for resource conflicts with ACPI do not have to explicitly call
acpi_check_resource_conflict() (and friends), but instead will get an error 
when trying to claim the resources from the regular resource system. However 
chances are this will break things on quite a lot of systems, still it would be 
the right thing to do in the end IMHO.

<snip>

> If this is an ASUS only problem with very specific thermal
> sensors, better add the quirk at the specific sensor driver.
> 

It is neither ASUS specific, not is it limited to specific hwmon IC's, these 
are not thermal sensors, they are capable of monitoring far more then just 
temperature, which is why the ACPI thermal device interface is insufficient.

> PS: I just realize that in ATK0110 the same thermal device
> (IO port 0x22D) is used as in the generic ACPI device (_TMP).
> So it looks like your asus thermal driver will not only interfere
> with the hwmon driver, but with the ACPI thermal driver itself.

I would expect the ACPI code to take care of any conflicts between the various 
interfaces it offers itself. And if it is unsafe to call multiple ACPI methods 
at the same time, I would expect the ACPI core to fix this.

Also note that the above paragraph actually advocates in favour of making some 
kind of change to acpi_enforce_resources. Proposal, what if we change the auto 
setting to not only mean "strict" when an ATK0110 interface is present, but 
also when the ACPI thermal zone interface is present ?

Thanks & Regards,

Hans

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jean Delvare Jan. 29, 2009, 9:31 p.m. UTC | #6
On Thu, 29 Jan 2009 19:58:00 +0100, Hans de Goede wrote:
> Thomas Renninger wrote:
> >>> Either this should be more generic (instead of hardcoded "ATK0110"
> >>> device, use a list to add further specific thermal ACPI drivers). Hmm,
> >>> maybe it's only ASUS violating the spec?
> >> ASUS it's not actually violating any spec...
> > They have to export the temperature through a thermal ACPI
> > device, not through the ASUS specific ATK0110 device. This is
> > what I mean whether there might be other vendor specific ACPI
> > devices violating the spec (by providing temperature read
> > functionality which should be done through the generic thermal
> > ACPI device).
> > 
> 
> hwmon IC's monitor far more then just temperatures, ASUS is doing the right 
> thing here by providing an ACPI interface to also read voltages and fan speeds, 
> so that these can be read in a way that does not interfere with the ACPI code.
> 
> And although even their interface does not expose the full functionality of the 
> hwmon IC's, it is much much better then what the thermal ACPI code gives us.
> 
> >>> Thinkpads seem to read out extra thermal
> >>> data from EC and shouldn't interfere with hwmon drivers.
> >> The point is that there is only 1 physical sensor, which both ACPI and
> >> a native driver want to drive; transaction on SMBus to read the sensor
> >> are usually in the form "select bank" + "read" and the sequence is
> >> *not* atomic. In ASUS case the IO ports are correctly reserved by the
> >> firmware, but (historically) this wasn't taken into account.
> > I know about this problem.
> > 
> >> Aside from ASUS the problem is generic since there are two drivers
> >> poking at the same hardware; for example there are reports of native
> >> drivers interfering with normal TZ polling (see [1])
> > Yes, this is even worse as the check that you want to add will not
> > catch it.
> 
> Ack, so the proper solution would be to just make the acpi_enforce_resources 
> strict by default, but this may cause lack of functionality for some user who 
> are currently using native drivers for hwmon features. Which is why I (hwmon 
> subsytem contributor and Jean Delvare (i2c and (ex)hwmon subsystem maintainer) 
> proposed this auto setting as a compromise. I'm fine with changing the default 
> to strict, AFAIK Jean prefers the auto setting.
> 
> <snip>
> 
> >> At least eeepc and (some?) thinkpads have ACPI
> >> hwmon capabilities but I don't know whether there are native drivers
> >> available or not (but they could be "blacklisted"  preemptively like
> >> ASUS ATK).
> > I expect that ASUS only will interfere with specific hwmon driver(s)?
> 
> Yes only those used on their motherboards, and given the wide range of 
> motherboard ASUS produces and they offer the ATK0110 interface on almost all of 
> them, it pretty much comes down to "all hwmon and smbus drivers", although I'm 
> sure if we look we can find exceptions.
> 
> > So why don't you move the check into the hwmon driver and make it not
> > load on these systems?
> > It's still ugly, but IMO better than to pollute the ACPI core.
> > 
> 
> Because we do not want to do this in a zillion places when it should be done in 
> only one. The right solution is to make acpi_enforce_resources strict the 
> default, the auto setting is meant as a slow migration path to that right 
> solution, as doing that right now probably will cause lots of complaints.
> 
> Actually, the really right solution would be for the ACPI subsystem to actually 
> claim all the resources using the regular resource tracking so that drivers who 
> want to check for resource conflicts with ACPI do not have to explicitly call
> acpi_check_resource_conflict() (and friends), but instead will get an error 
> when trying to claim the resources from the regular resource system. However 
> chances are this will break things on quite a lot of systems, still it would be 
> the right thing to do in the end IMHO.
> 
> <snip>
> 
> > If this is an ASUS only problem with very specific thermal
> > sensors, better add the quirk at the specific sensor driver.
> > 
> 
> It is neither ASUS specific, not is it limited to specific hwmon IC's, these 
> are not thermal sensors, they are capable of monitoring far more then just 
> temperature, which is why the ACPI thermal device interface is insufficient.
> 
> > PS: I just realize that in ATK0110 the same thermal device
> > (IO port 0x22D) is used as in the generic ACPI device (_TMP).
> > So it looks like your asus thermal driver will not only interfere
> > with the hwmon driver, but with the ACPI thermal driver itself.
> 
> I would expect the ACPI code to take care of any conflicts between the various 
> interfaces it offers itself. And if it is unsafe to call multiple ACPI methods 
> at the same time, I would expect the ACPI core to fix this.

For the records, I second everything Hans wrote above.

> Also note that the above paragraph actually advocates in favour of making some 
> kind of change to acpi_enforce_resources. Proposal, what if we change the auto 
> setting to not only mean "strict" when an ATK0110 interface is present, but 
> also when the ACPI thermal zone interface is present ?

I expect problems if you do that without any exception. There are a
number of desktop motherboards out there with pretty broken TZ
implementations. To mention just one, my Jetway K8M8MS motherboard has a
TZ which reports 9 degrees C all the time. I disassembled the DSDT to
find out that the code is incorrect and they are reading a
configuration register instead of the temperature register. Apparently
customers didn't care at all, as this was never fixed. I reported to
Jetway but didn't get any answer.

More generally, the ACPI thermal zone doesn't provide a tenth of the
functionality of native hardware monitoring drivers on desktop and
server systems. So switching to strict when a thermal zone is found
would be as painful as switching to strict unconditionally, in that it
will cause a loss of functionality for many users out there. Whoever
does this will first have to become the i2c maintainer and the hwmon
maintainer, and then take all the flames. That won't be me for sure.

Thanks,
Thomas Renninger Jan. 30, 2009, 2:29 p.m. UTC | #7
On Thursday 29 January 2009 19:58:00 Hans de Goede wrote:
> Thomas Renninger wrote:
> >>> Either this should be more generic (instead of hardcoded "ATK0110"
> >>> device, use a list to add further specific thermal ACPI drivers). Hmm,
> >>> maybe it's only ASUS violating the spec?
> >>
> >> ASUS it's not actually violating any spec...
> >
> > They have to export the temperature through a thermal ACPI
> > device, not through the ASUS specific ATK0110 device. This is
> > what I mean whether there might be other vendor specific ACPI
> > devices violating the spec (by providing temperature read
> > functionality which should be done through the generic thermal
> > ACPI device).
>
> hwmon IC's monitor far more then just temperatures, ASUS is doing the right
> thing here by providing an ACPI interface to also read voltages and fan
> speeds, so that these can be read in a way that does not interfere with the
> ACPI code.
>
> And although even their interface does not expose the full functionality of
> the hwmon IC's, it is much much better then what the thermal ACPI code
> gives us.
>
> >>> Thinkpads seem to read out extra thermal
> >>> data from EC and shouldn't interfere with hwmon drivers.
> >>
> >> The point is that there is only 1 physical sensor, which both ACPI and
> >> a native driver want to drive; transaction on SMBus to read the sensor
> >> are usually in the form "select bank" + "read" and the sequence is
> >> *not* atomic. In ASUS case the IO ports are correctly reserved by the
> >> firmware, but (historically) this wasn't taken into account.
> >
> > I know about this problem.
> >
> >> Aside from ASUS the problem is generic since there are two drivers
> >> poking at the same hardware; for example there are reports of native
> >> drivers interfering with normal TZ polling (see [1])
> >
> > Yes, this is even worse as the check that you want to add will not
> > catch it.
>
> Ack, so the proper solution would be to just make the
> acpi_enforce_resources strict by default, but this may cause lack of
> functionality for some user who are currently using native drivers for
> hwmon features. Which is why I (hwmon subsytem contributor and Jean Delvare
> (i2c and (ex)hwmon subsystem maintainer) proposed this auto setting as a
> compromise. I'm fine with changing the default to strict, AFAIK Jean
> prefers the auto setting.
>
> <snip>
>
> >> At least eeepc and (some?) thinkpads have ACPI
> >> hwmon capabilities but I don't know whether there are native drivers
> >> available or not (but they could be "blacklisted"  preemptively like
> >> ASUS ATK).
> >
> > I expect that ASUS only will interfere with specific hwmon driver(s)?
>
> Yes only those used on their motherboards, and given the wide range of
> motherboard ASUS produces and they offer the ATK0110 interface on almost
> all of them, it pretty much comes down to "all hwmon and smbus drivers",
> although I'm sure if we look we can find exceptions.
I thought it may only be one specific hwmon driver which could
interfere on ASUS boards with the ATK0110 device. I agree that
it would not make sense to cluster all hwmon drivers.

The problem is similar to the video backlight switching problem
(legacy drivers vs preferred generic ACPI video driver) where
you also must know which driver to take before module loading time.

I hoped to be able to come up with something more clever or say a nicer
solution or at least a short discussion. But I also have no better idea.
Maybe all such code should end up in a drivers/acpi/quirks.c file
at some time, similar to other places in the kernel.

I think it's time for Len looking at this. I wonder whether he will take
this patch or why he won't.
This is an ugly problem which unfortunately needs an ugly fix/check.

This code snippet should be part of your ATK0110 driver then and
not submitted/committed standalone?

Please take me into CC the next time you submit your asus-laptop/acpi driver, 
I like to have a look at it and can also give it a test.

> > So why don't you move the check into the hwmon driver and make it not
> > load on these systems?
> > It's still ugly, but IMO better than to pollute the ACPI core.
>
> Because we do not want to do this in a zillion places when it should be
> done in only one. The right solution is to make acpi_enforce_resources
> strict the default, the auto setting is meant as a slow migration path to
> that right solution, as doing that right now probably will cause lots of
> complaints.


> Actually, the really right solution would be for the ACPI subsystem to
> actually claim all the resources using the regular resource tracking so
> that drivers who want to check for resource conflicts with ACPI do not have
> to explicitly call acpi_check_resource_conflict() (and friends), but
> instead will get an error when trying to claim the resources from the
> regular resource system. However chances are this will break things on
> quite a lot of systems, still it would be the right thing to do in the end
> IMHO.
>
> <snip>
>
> > If this is an ASUS only problem with very specific thermal
> > sensors, better add the quirk at the specific sensor driver.
>
> It is neither ASUS specific, not is it limited to specific hwmon IC's,
> these are not thermal sensors, they are capable of monitoring far more then
> just temperature, which is why the ACPI thermal device interface is
> insufficient.
>
> > PS: I just realize that in ATK0110 the same thermal device
> > (IO port 0x22D) is used as in the generic ACPI device (_TMP).
> > So it looks like your asus thermal driver will not only interfere
> > with the hwmon driver, but with the ACPI thermal driver itself.
>
> I would expect the ACPI code to take care of any conflicts between the
> various interfaces it offers itself. And if it is unsafe to call multiple
> ACPI methods at the same time, I would expect the ACPI core to fix this.
Yes. ACPI can easily make sure that not several resources are accessed at
the same time. It's the ACPI vs native OS code where this is so hard.

    Thomas

> Also note that the above paragraph actually advocates in favour of making
> some kind of change to acpi_enforce_resources. Proposal, what if we change
> the auto setting to not only mean "strict" when an ATK0110 interface is
> present, but also when the ACPI thermal zone interface is present ?
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Len Brown Feb. 4, 2009, 5:52 a.m. UTC | #8
On Thu, 29 Jan 2009, Luca Tettamanti wrote:

> Aside from ASUS the problem is generic since there are two drivers
> poking at the same hardware; for example there are reports of native
> drivers interfering with normal TZ polling (see [1]).

FYI,

While it is slightly off-topic of the (I agree, real)
technical issue here, note that polling is not "normal" on ACPI systems.
[1] was on  SuSE Linux 10.0, which on their own decided to
over-ride the kernel and enable thermal zone polling by default.

This turned out to be a bad idea because it exposed BIOS bugs
in ACPI thermal zones that were never exposed on other operating
systems - evidently because the other operating systems never 
poll thermal zones, they are entirely event driven.

cheers,
Len Brown, Intel Open Source Technology Center

> Luca
> [1] http://www.lm-sensors.org/ticket/2072 and:
> http://thread.gmane.org/gmane.linux.drivers.sensors/12359

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matthew Garrett Feb. 4, 2009, 6:05 a.m. UTC | #9
On Wed, Feb 04, 2009 at 12:52:06AM -0500, Len Brown wrote:

> While it is slightly off-topic of the (I agree, real)
> technical issue here, note that polling is not "normal" on ACPI systems.
> [1] was on  SuSE Linux 10.0, which on their own decided to
> over-ride the kernel and enable thermal zone polling by default.

Checking the DSDTs I have to hand, it seems that polling is expected on 
about 5% of systems via an explicit _TZP and on almost all machines via 
_TSP. Even on systems where thermal notifications are provided, it's 
still up to the OS to poll the zone to find the current temperature and 
take appropriate action. There's still a window for native smbus drivers 
to screw everything up.
Hans de Goede Feb. 4, 2009, 8:37 a.m. UTC | #10
Matthew Garrett wrote:
> On Wed, Feb 04, 2009 at 12:52:06AM -0500, Len Brown wrote:
> 
>> While it is slightly off-topic of the (I agree, real)
>> technical issue here, note that polling is not "normal" on ACPI systems.
>> [1] was on  SuSE Linux 10.0, which on their own decided to
>> over-ride the kernel and enable thermal zone polling by default.
> 
> Checking the DSDTs I have to hand, it seems that polling is expected on 
> about 5% of systems via an explicit _TZP and on almost all machines via 
> _TSP. Even on systems where thermal notifications are provided, it's 
> still up to the OS to poll the zone to find the current temperature and 
> take appropriate action. There's still a window for native smbus drivers 
> to screw everything up.
> 

Note that this not only applies to smbus devices but also to superio devices, 
in general these superio hwmon devices use 2 isa ports an index and a data one, 
image they mayhem which could happen if for example:
native driver sets index
acpi driver sets index
acpi driver reads data
native driver writes data (to a completely wrong register)

We *really* need to be fixing this.

Len, Matthew, what is you opinion of the proposed auto setting for 
acpi_enforce_resources, which is meant to mean strict on known problematic 
systems and lax on others?

Not I'm not asking what you think of the code (yet) just what you think of the 
principle. If we can atleast all agree on this as a compromise not breaking 
hwmon on quite a few systems (the strict setting) while stile providing 
something safer then the current lax, then I'm sure we can hash out any code 
problems soon enough.

Regards,

Hans
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matthew Garrett Feb. 4, 2009, 1:17 p.m. UTC | #11
On Wed, Feb 04, 2009 at 09:37:51AM +0100, Hans de Goede wrote:

> Len, Matthew, what is you opinion of the proposed auto setting for 
> acpi_enforce_resources, which is meant to mean strict on known problematic 
> systems and lax on others?

Personally, I'd rather that it was "strict" on everything. We might 
break some existing setups, but they're already working mostly by luck.
Jean Delvare Feb. 4, 2009, 1:26 p.m. UTC | #12
Hi Matthew,

On Wed, 4 Feb 2009 13:17:09 +0000, Matthew Garrett wrote:
> On Wed, Feb 04, 2009 at 09:37:51AM +0100, Hans de Goede wrote:
> 
> > Len, Matthew, what is you opinion of the proposed auto setting for 
> > acpi_enforce_resources, which is meant to mean strict on known problematic 
> > systems and lax on others?
> 
> Personally, I'd rather that it was "strict" on everything. We might 
> break some existing setups, but they're already working mostly by luck.

Are you the new hwmon and i2c subsystems maintainer and I wasn't aware
of it?
Matthew Garrett Feb. 4, 2009, 2:20 p.m. UTC | #13
On Wed, Feb 04, 2009 at 02:26:06PM +0100, Jean Delvare wrote:
> On Wed, 4 Feb 2009 13:17:09 +0000, Matthew Garrett wrote:
> > Personally, I'd rather that it was "strict" on everything. We might 
> > break some existing setups, but they're already working mostly by luck.
> 
> Are you the new hwmon and i2c subsystems maintainer and I wasn't aware
> of it?

If you've got some programmatic way to tell the difference between safe 
and dangerous reuse of ACPI resources then that would obviously be 
preferable, but I doubt that's practical. auto is a compromise that 
avoids one specific case of breakage, but it does nothing to protect us 
on the majority of systems. Allowing the firmware and the OS to attempt 
to access the same hardware without any locking is an invitation for 
disaster, and in the absence of any way to prevent the firmware from 
doing it...

But. Hans asked for my opinion - the maintainer's is obviously more 
relevant.
Jean Delvare Feb. 10, 2009, 1:57 p.m. UTC | #14
Hi Matthew,

Sorry for the late answer.

On Wed, 4 Feb 2009 14:20:15 +0000, Matthew Garrett wrote:
> On Wed, Feb 04, 2009 at 02:26:06PM +0100, Jean Delvare wrote:
> > On Wed, 4 Feb 2009 13:17:09 +0000, Matthew Garrett wrote:
> > > Personally, I'd rather that it was "strict" on everything. We might 
> > > break some existing setups, but they're already working mostly by luck.
> > 
> > Are you the new hwmon and i2c subsystems maintainer and I wasn't aware
> > of it?
> 
> If you've got some programmatic way to tell the difference between safe 
> and dangerous reuse of ACPI resources then that would obviously be 
> preferable, but I doubt that's practical.

I wish we had such a way, but I don't know of any. If someone can think
of one, that would be someone who knows ACPI much better than I do.

> auto is a compromise that 
> avoids one specific case of breakage, but it does nothing to protect us 
> on the majority of systems. Allowing the firmware and the OS to attempt 
> to access the same hardware without any locking is an invitation for 
> disaster, and in the absence of any way to prevent the firmware from 
> doing it...

In theory you are, of course, perfectly right. The question is, how do
we get there without making people angry because of the regression? I
had yet another example a few days ago:

http://lists.lm-sensors.org/pipermail/lm-sensors/2009-February/025297.html

That system implements an ACPI thermal zone, which apparently reads its
values from the second temperature channel of the IT8716F Super-I/O
chip. This channels happens to be broken (not sure why but that's not
really the point) and returns a wrong temperature value.

The same chip can be driven by our native it87 driver, which, on this
specific board, provides support for 9 voltages, 3 fans, and 1 working
temperature. Do we really have to tell the user to not use the it87
driver and instead use the ACPI thermal driver "because that's what the
firmware wants"?

In this case, I really would like to be able to blacklist the ACPI
thermal device, and let the it87 driver handle the device. If we can
have such a blacklist in the kernel, then I am fine switching the
resource conflict check to "strict" by default. But is is possible? I
would like the ACPI people to tell me. Basically the logic would be as
follows:

if (not laptop) {
	if (ACPI thermal zone defined
	 && no custom ACPI code dealing with thermal conditions) {
		disable ACPI thermal zone and free its resources for native driver
	}
}

But I guess there is no way to know what exactly the ACPI thermal zone
is doing, except by looking at the DSDT, so this can't be automated?

Is it at least possible to disable the ACPI thermal zone either as a
command-line parameter or an internal blacklist?

> But. Hans asked for my opinion - the maintainer's is obviously more 
> relevant.

Well, officially I am not the maintainer of the hwmon subsystem, only
whose of the i2c subsystem. But the fact is that the resource conflict
checking policy affects both the same way. That being said, I'm not
sure how worth my opinion (about this specific matter) is these days...

What I want you (and everyone) to realize is that just switching the
default checking level to strict tomorrow isn't going to work. If we
want the default to be strict then we will have to add a number of
mechanisms to let the user override this in a way which is as easy and
transparent as possible for the user.

One approach that may work is to change the default based on the ACPI
implementation year (I think the info is available, right?) We could
default to strict for systems with year >= 2009. This may still prevent
users from getting the best out of their system, but at least won't
cause a regression for users of older systems where the native driver
has been used so far. I know it's not an ideal solution, but ACPI
implementations aren't ideal either.
Matthew Garrett Feb. 10, 2009, 2:08 p.m. UTC | #15
On Tue, Feb 10, 2009 at 02:57:16PM +0100, Jean Delvare wrote:

> In theory you are, of course, perfectly right. The question is, how do
> we get there without making people angry because of the regression? 

The only thing we can do is add a printk that informs users that passing 
a boot argument will allow them to use the drivers as they used to.

> The same chip can be driven by our native it87 driver, which, on this
> specific board, provides support for 9 voltages, 3 fans, and 1 working
> temperature. Do we really have to tell the user to not use the it87
> driver and instead use the ACPI thermal driver "because that's what the
> firmware wants"?

It's valid (if dumb) for vendors to design their platforms such that 
enabling ACPI and then not using the thermal code may result in hardware 
damage. We have no way of determining that in advance, so all we can do 
is tell the user that they can pass an argument if they know it's safe 
to do so.

> But I guess there is no way to know what exactly the ACPI thermal zone
> is doing, except by looking at the DSDT, so this can't be automated?

Correct.

> Is it at least possible to disable the ACPI thermal zone either as a
> command-line parameter or an internal blacklist?

It's possible, and we could certainly add an argument to do so. However, 
removing support for the kernel use of the thermal zone doesn't prevent 
the firmware from making calls to the thermal code itself. There's no 
real way we can block that.

> One approach that may work is to change the default based on the ACPI
> implementation year (I think the info is available, right?) We could
> default to strict for systems with year >= 2009. This may still prevent
> users from getting the best out of their system, but at least won't
> cause a regression for users of older systems where the native driver
> has been used so far. I know it's not an ideal solution, but ACPI
> implementations aren't ideal either.

The problem with this approach is that we still end up with a large 
number of malfunctioning machines. Really, I don't think there's any way 
to handle this other than defaulting to strict, letting the default be 
changed at run and boot time and printing a message when a driver is 
refused permission to bind. Distributions that want to obtain the 
previous behaviour can change the default back.
Hans de Goede Feb. 10, 2009, 3:32 p.m. UTC | #16
Matthew Garrett wrote:
> On Tue, Feb 10, 2009 at 02:57:16PM +0100, Jean Delvare wrote:
> 
  year (I think the info is available, right?) We could
>> default to strict for systems with year >= 2009. This may still prevent
>> users from getting the best out of their system, but at least won't
>> cause a regression for users of older systems where the native driver
>> has been used so far. I know it's not an ideal solution, but ACPI
>> implementations aren't ideal either.
> 
> The problem with this approach is that we still end up with a large 
> number of malfunctioning machines. Really, I don't think there's any way 
> to handle this other than defaulting to strict, letting the default be 
> changed at run and boot time and printing a message when a driver is 
> refused permission to bind. Distributions that want to obtain the 
> previous behaviour can change the default back.
> 

For the record we have changed the default to strict in Fedora's 
development branch, for 2 weeks or so now, including in the recently 
released Fedora 11 release and we've had 0 complaints so far.

Regards,

Hans
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jean Delvare Feb. 10, 2009, 4:24 p.m. UTC | #17
On Tue, 10 Feb 2009 16:32:24 +0100, Hans de Goede wrote:
> Matthew Garrett wrote:
> > On Tue, Feb 10, 2009 at 02:57:16PM +0100, Jean Delvare wrote:
> > 
>   year (I think the info is available, right?) We could
> >> default to strict for systems with year >= 2009. This may still prevent
> >> users from getting the best out of their system, but at least won't
> >> cause a regression for users of older systems where the native driver
> >> has been used so far. I know it's not an ideal solution, but ACPI
> >> implementations aren't ideal either.
> > 
> > The problem with this approach is that we still end up with a large 
> > number of malfunctioning machines. Really, I don't think there's any way 
> > to handle this other than defaulting to strict, letting the default be 
> > changed at run and boot time and printing a message when a driver is 
> > refused permission to bind. Distributions that want to obtain the 
> > previous behaviour can change the default back.

If we expect different distributions / user classes to set a different
default, then it might make sense to make acpi_enforce_resources's
default value a config option?

> For the record we have changed the default to strict in Fedora's 
> development branch, for 2 weeks or so now, including in the recently 
> released Fedora 11 release and we've had 0 complaints so far.

Well, if the number of affected systems is small, this is good news.
But this is only 2 weeks and one distribution, coverage isn't
sufficient to claim anything yet IMHO.

That being said... if there's a common consensus that switching to
strict and dealing with fallouts is the best thing to do, and I'm the
only one objecting to this, then I am ready to admit that I was wrong
and let you proceed.
Jean Delvare Feb. 12, 2009, 12:44 p.m. UTC | #18
On Tue, 10 Feb 2009 14:08:29 +0000, Matthew Garrett wrote:
> On Tue, Feb 10, 2009 at 02:57:16PM +0100, Jean Delvare wrote:
> 
> > In theory you are, of course, perfectly right. The question is, how do
> > we get there without making people angry because of the regression? 
> 
> The only thing we can do is add a printk that informs users that passing 
> a boot argument will allow them to use the drivers as they used to.

Good point.

> > The same chip can be driven by our native it87 driver, which, on this
> > specific board, provides support for 9 voltages, 3 fans, and 1 working
> > temperature. Do we really have to tell the user to not use the it87
> > driver and instead use the ACPI thermal driver "because that's what the
> > firmware wants"?
> 
> It's valid (if dumb) for vendors to design their platforms such that 
> enabling ACPI and then not using the thermal code may result in hardware 
> damage. We have no way of determining that in advance, so all we can do 
> is tell the user that they can pass an argument if they know it's safe 
> to do so.

OK, I understand.

> > But I guess there is no way to know what exactly the ACPI thermal zone
> > is doing, except by looking at the DSDT, so this can't be automated?
> 
> Correct.
> 
> > Is it at least possible to disable the ACPI thermal zone either as a
> > command-line parameter or an internal blacklist?
> 
> It's possible, and we could certainly add an argument to do so. However, 
> removing support for the kernel use of the thermal zone doesn't prevent 
> the firmware from making calls to the thermal code itself. There's no 
> real way we can block that.
> 
> > One approach that may work is to change the default based on the ACPI
> > implementation year (I think the info is available, right?) We could
> > default to strict for systems with year >= 2009. This may still prevent
> > users from getting the best out of their system, but at least won't
> > cause a regression for users of older systems where the native driver
> > has been used so far. I know it's not an ideal solution, but ACPI
> > implementations aren't ideal either.
> 
> The problem with this approach is that we still end up with a large 
> number of malfunctioning machines.

Well, that's what we have at the moment and the world didn't end.
Enabling strict checks for a subset of machines is always an
improvement compared to the current situation.

> Really, I don't think there's any way 
> to handle this other than defaulting to strict, letting the default be 
> changed at run and boot time and printing a message when a driver is 
> refused permission to bind. Distributions that want to obtain the 
> previous behaviour can change the default back.

Anyway, as I already wrote elsewhere in this thread, I no longer object
to the change you propose. I won't instigate it, but if it happens,
and care is taken to address the foreseeable downfalls, fine with me.

Thanks,
Pavel Machek Feb. 27, 2009, 1:27 p.m. UTC | #19
Hi!

> > For the record we have changed the default to strict in Fedora's 
> > development branch, for 2 weeks or so now, including in the recently 
> > released Fedora 11 release and we've had 0 complaints so far.
> 
> Well, if the number of affected systems is small, this is good news.
> But this is only 2 weeks and one distribution, coverage isn't
> sufficient to claim anything yet IMHO.
> 
> That being said... if there's a common consensus that switching to
> strict and dealing with fallouts is the best thing to do, and I'm the
> only one objecting to this, then I am ready to admit that I was wrong
> and let you proceed.

I believe that 'enable strict, deal with fallout' is the best
long-term strategy...
									Pavel
Luca Tettamanti March 24, 2009, 12:39 p.m. UTC | #20
On Fri, Feb 27, 2009 at 2:27 PM, Pavel Machek <pavel@ucw.cz> wrote:
> Hi!
>
>> > For the record we have changed the default to strict in Fedora's
>> > development branch, for 2 weeks or so now, including in the recently
>> > released Fedora 11 release and we've had 0 complaints so far.
>>
>> Well, if the number of affected systems is small, this is good news.
>> But this is only 2 weeks and one distribution, coverage isn't
>> sufficient to claim anything yet IMHO.
>>
>> That being said... if there's a common consensus that switching to
>> strict and dealing with fallouts is the best thing to do, and I'm the
>> only one objecting to this, then I am ready to admit that I was wrong
>> and let you proceed.
>
> I believe that 'enable strict, deal with fallout' is the best
> long-term strategy...

Hello,
the merge window for .30 is now open, what are we going to do with this issue?

Luca
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hans de Goede March 24, 2009, 1:21 p.m. UTC | #21
On 03/24/2009 01:39 PM, Luca Tettamanti wrote:
> On Fri, Feb 27, 2009 at 2:27 PM, Pavel Machek<pavel@ucw.cz>  wrote:
>> Hi!
>>
>>>> For the record we have changed the default to strict in Fedora's
>>>> development branch, for 2 weeks or so now, including in the recently
>>>> released Fedora 11 release and we've had 0 complaints so far.
>>> Well, if the number of affected systems is small, this is good news.
>>> But this is only 2 weeks and one distribution, coverage isn't
>>> sufficient to claim anything yet IMHO.
>>>
>>> That being said... if there's a common consensus that switching to
>>> strict and dealing with fallouts is the best thing to do, and I'm the
>>> only one objecting to this, then I am ready to admit that I was wrong
>>> and let you proceed.
>> I believe that 'enable strict, deal with fallout' is the best
>> long-term strategy...
>
> Hello,
> the merge window for .30 is now open, what are we going to do with this issue?
>

I think the consensus was to make the default strict and to merge the atk0110
driver, right?

Note that we've been running this setup in Fedora kernels for quite a while now,
and have had only one bug report, which was solved by simply explaining why this
was done.

Regards,

Hans
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jean Delvare March 24, 2009, 1:43 p.m. UTC | #22
Hi Hans, Luca,

On Tue, 24 Mar 2009 14:21:21 +0100, Hans de Goede wrote:
> On 03/24/2009 01:39 PM, Luca Tettamanti wrote:
> > the merge window for .30 is now open, what are we going to do with this issue?
> 
> I think the consensus was to make the default strict and to merge the atk0110
> driver, right?

Yes.

> Note that we've been running this setup in Fedora kernels for quite a while now,
> and have had only one bug report, which was solved by simply explaining why this
> was done.

Maybe the explanation in question could be added somewhere, either in
the help text of CONFIG_HWMON, or somewhere under Documentation/, or on
the lm-sensors.org wiki? I expect the question to be asked more
frequently once the default changes upstream, and I would like avoiding
wasting developer time replying again and again. Having the possibility
to point users to a clear explanation would be very pleasant.
Hans de Goede March 24, 2009, 2:29 p.m. UTC | #23
On 03/24/2009 02:43 PM, Jean Delvare wrote:
> Hi Hans, Luca,
>
> On Tue, 24 Mar 2009 14:21:21 +0100, Hans de Goede wrote:
>> On 03/24/2009 01:39 PM, Luca Tettamanti wrote:
>>> the merge window for .30 is now open, what are we going to do with this issue?
>> I think the consensus was to make the default strict and to merge the atk0110
>> driver, right?
>
> Yes.
>
>> Note that we've been running this setup in Fedora kernels for quite a while now,
>> and have had only one bug report, which was solved by simply explaining why this
>> was done.
>
> Maybe the explanation in question could be added somewhere, either in
> the help text of CONFIG_HWMON, or somewhere under Documentation/, or on
> the lm-sensors.org wiki?

I think having an explanation somewhere would be great, not sure if the explanation
in question was all that great though (and it is burried in between a gazillion other
kernel bugs, so a bit hard to find).

Regards,

Hans
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Len Brown April 2, 2009, 10:45 p.m. UTC | #24
On Wed, 4 Feb 2009, Matthew Garrett wrote:

> On Wed, Feb 04, 2009 at 12:52:06AM -0500, Len Brown wrote:
> 
> > While it is slightly off-topic of the (I agree, real)
> > technical issue here, note that polling is not "normal" on ACPI systems.
> > [1] was on  SuSE Linux 10.0, which on their own decided to
> > over-ride the kernel and enable thermal zone polling by default.
> 
> Checking the DSDTs I have to hand, it seems that polling is expected on 
> about 5% of systems via an explicit _TZP and on almost all machines via 
> _TSP. Even on systems where thermal notifications are provided, it's 
> still up to the OS to poll the zone to find the current temperature and 
> take appropriate action. There's still a window for native smbus drivers 
> to screw everything up.

FWIW

In the last 6 years, I've seen exactly 3 systems with a non-zero _TZP.
An old Averatec laptop asked for 1 second, and two recent EEE-PC's ask for 
30 seconds.  Dunno why Asus has made this leap backwards.

_TSP is a different beast.  It only exists in the context of _PSV and 
_PSL.

-Len

ps. I just noticed something in the spec under _PSL...
"If a linear performance control register is not defined(..) for a 
processor defined in _PSL or for a processor device in the zone as 
indicated
by _TZM, then the processor must support processor performance states
(in other words, the processor's processor object must include _PCT, _PSS, 
and _PPC).

Interesting, here is an official tie in of P-states and passive trip 
points that I'd not noticed before....


--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: b/Documentation/kernel-parameters.txt
===================================================================
--- a/Documentation/kernel-parameters.txt	2009-01-17 21:22:49.218882286 +0100
+++ b/Documentation/kernel-parameters.txt	2009-01-21 23:25:41.262478018 +0100
@@ -258,6 +258,25 @@ 
 			to assume that this machine's pmtimer latches its value
 			and always returns good values.
 
+	acpi_enforce_resources=	[ACPI]
+			{ strict, auto, lax, no }
+			Check for resource conflicts between native drivers
+			and ACPI OperationRegions (SystemIO and System Memory
+			only). IO ports and memory declared in ACPI might be
+			used by the ACPI subsystem in arbitrary AML code and
+			can interfere with legacy drivers.
+			strict: access to resources claimed by ACPI is denied;
+			legacy drivers trying to access reserved resources
+			will fail to load.
+			auto (default): try and detect ACPI devices with known
+			ACPI drivers; escalates the setting to "strict" if such
+			a device is found, otherwise behaves like "lax".
+			lax: access to resources claimed by ACPI is allowed;
+			legacy drivers trying to access reserved resources
+			will load and a warning message is logged.
+			no: ACPI OperationRegions are not marked as reserved,
+			no further checks are performed.
+
 	agp=		[AGP]
 			{ off | try_unsupported }
 			off: disable AGP support
Index: b/drivers/acpi/osl.c
===================================================================
--- a/drivers/acpi/osl.c	2009-01-17 21:22:49.190882303 +0100
+++ b/drivers/acpi/osl.c	2009-01-25 22:04:30.759209000 +0100
@@ -1063,7 +1063,10 @@ 
  * in arbitrary AML code and can interfere with legacy drivers.
  * acpi_enforce_resources= can be set to:
  *
- *   - strict           (2)
+ *   - auto             (2)
+ *     -> detect possible conflicts with ACPI drivers and switch to
+ *     strict if needed, otherwise act like lax
+ *   - strict           (3)
  *     -> further driver trying to access the resources will not load
  *   - lax (default)    (1)
  *     -> further driver trying to access the resources will load, but you
@@ -1073,11 +1076,12 @@ 
  *     -> ACPI Operation Region resources will not be registered
  *
  */
-#define ENFORCE_RESOURCES_STRICT 2
+#define ENFORCE_RESOURCES_STRICT 3
+#define ENFORCE_RESOURCES_AUTO   2
 #define ENFORCE_RESOURCES_LAX    1
 #define ENFORCE_RESOURCES_NO     0
 
-static unsigned int acpi_enforce_resources = ENFORCE_RESOURCES_LAX;
+static unsigned int acpi_enforce_resources = ENFORCE_RESOURCES_AUTO;
 
 static int __init acpi_enforce_resources_setup(char *str)
 {
@@ -1086,6 +1090,8 @@ 
 
 	if (!strcmp("strict", str))
 		acpi_enforce_resources = ENFORCE_RESOURCES_STRICT;
+	else if (!strcmp("auto", str))
+		acpi_enforce_resources = ENFORCE_RESOURCES_AUTO;
 	else if (!strcmp("lax", str))
 		acpi_enforce_resources = ENFORCE_RESOURCES_LAX;
 	else if (!strcmp("no", str))
@@ -1096,6 +1102,35 @@ 
 
 __setup("acpi_enforce_resources=", acpi_enforce_resources_setup);
 
+static acpi_status acpi_detect_asus_atk_cb(acpi_handle obj_handle,
+		u32 nesting_level, void *context, void **return_value)
+{
+	*((bool *)return_value) = true;
+	return AE_CTRL_TERMINATE;
+}
+
+static int __init acpi_detect_asus_atk(void)
+{
+	acpi_status ret;
+	bool found = false;
+
+	if (acpi_enforce_resources != ENFORCE_RESOURCES_AUTO)
+		return 0;
+
+	ret = acpi_get_devices("ATK0110", acpi_detect_asus_atk_cb,
+			NULL, (void **)&found);
+
+	if (ret == AE_OK && found) {
+		printk(KERN_DEBUG "Asus ATK0110 interface detected: "
+				"enforcing resource checking.\n");
+		acpi_enforce_resources = ENFORCE_RESOURCES_STRICT;
+	} else
+		acpi_enforce_resources =  ENFORCE_RESOURCES_LAX;
+
+	return 0;
+}
+fs_initcall(acpi_detect_asus_atk);
+
 /* Check for resource conflicts between ACPI OperationRegions and native
  * drivers */
 int acpi_check_resource_conflict(struct resource *res)