diff mbox series

ACPI: Fix selecting the wrong ACPI fwnode for the iGPU on some Dell laptops

Message ID 20230109205721.60694-1-hdegoede@redhat.com (mailing list archive)
State New, archived
Headers show
Series ACPI: Fix selecting the wrong ACPI fwnode for the iGPU on some Dell laptops | expand

Commit Message

Hans de Goede Jan. 9, 2023, 8:57 p.m. UTC
The Dell Latitude E6430 both with and without the optional NVidia dGPU
has a bug in its ACPI tables which is causing Linux to assign the wrong
ACPI fwnode / companion to the pci_device for the i915 iGPU.

Specifically under the PCI root bridge there are these 2 ACPI Device()s :

 Scope (_SB.PCI0)
 {
     Device (GFX0)
     {
         Name (_ADR, 0x00020000)  // _ADR: Address
     }

     ...

     Device (VID)
     {
         Name (_ADR, 0x00020000)  // _ADR: Address
         ...

         Method (_DOS, 1, NotSerialized)  // _DOS: Disable Output Switching
         {
             VDP8 = Arg0
             VDP1 (One, VDP8)
         }

         Method (_DOD, 0, NotSerialized)  // _DOD: Display Output Devices
         {
             ...
         }
         ...
     }
 }

The non-functional GFX0 ACPI device is a problem, because this gets
returned as ACPI companion-device by acpi_find_child_device() for the iGPU.

This is a long standing problem and the i915 driver does use the ACPI
companion for some things, but works fine without it.

However since commit 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()")
acpi_get_pci_dev() relies on the physical-node pointer in the acpi_device
and that is set on the wrong acpi_device because of the wrong
acpi_find_child_device() return. This breaks the ACPI video code, leading
to non working backlight control in some cases.

Make find_child_checks() return a higher score for children which have
pnp-ids set by various scan helpers like acpi_is_video_device(), so
that it picks the right companion-device.

An alternative approach would be to directly call acpi_is_video_device()
from find_child_checks() but that would be somewhat computationally
expensive given that acpi_find_child_device() iterates over all the
PCI0 children every time it is called.

Fixes: 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/acpi/glue.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

Comments

Hans de Goede Jan. 9, 2023, 9:14 p.m. UTC | #1
p.s.

This fixes a regression in 6.1, adding the regressions list to the Cc.

Once we figure out the best way to fix this (this patch is more of a proposal
how to fix this rather then a definitive fix), we should also backport
the fix to 6.1.y.


On 1/9/23 21:57, Hans de Goede wrote:
> The Dell Latitude E6430 both with and without the optional NVidia dGPU
> has a bug in its ACPI tables which is causing Linux to assign the wrong
> ACPI fwnode / companion to the pci_device for the i915 iGPU.
> 
> Specifically under the PCI root bridge there are these 2 ACPI Device()s :
> 
>  Scope (_SB.PCI0)
>  {
>      Device (GFX0)
>      {
>          Name (_ADR, 0x00020000)  // _ADR: Address
>      }
> 
>      ...
> 
>      Device (VID)
>      {
>          Name (_ADR, 0x00020000)  // _ADR: Address
>          ...
> 
>          Method (_DOS, 1, NotSerialized)  // _DOS: Disable Output Switching
>          {
>              VDP8 = Arg0
>              VDP1 (One, VDP8)
>          }
> 
>          Method (_DOD, 0, NotSerialized)  // _DOD: Display Output Devices
>          {
>              ...
>          }
>          ...
>      }
>  }
> 
> The non-functional GFX0 ACPI device is a problem, because this gets
> returned as ACPI companion-device by acpi_find_child_device() for the iGPU.
> 
> This is a long standing problem and the i915 driver does use the ACPI
> companion for some things, but works fine without it.
> 
> However since commit 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()")
> acpi_get_pci_dev() relies on the physical-node pointer in the acpi_device
> and that is set on the wrong acpi_device because of the wrong
> acpi_find_child_device() return. This breaks the ACPI video code, leading
> to non working backlight control in some cases.
> 
> Make find_child_checks() return a higher score for children which have
> pnp-ids set by various scan helpers like acpi_is_video_device(), so
> that it picks the right companion-device.
> 
> An alternative approach would be to directly call acpi_is_video_device()
> from find_child_checks() but that would be somewhat computationally
> expensive given that acpi_find_child_device() iterates over all the
> PCI0 children every time it is called.
> 
> Fixes: 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()")
> Signed-off-by: Hans de Goede <hdegoede@redhat.com>
> ---
>  drivers/acpi/glue.c | 14 ++++++++++++--
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
> index 204fe94c7e45..2055dfd7678b 100644
> --- a/drivers/acpi/glue.c
> +++ b/drivers/acpi/glue.c
> @@ -75,7 +75,7 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev)
>  }
>  
>  #define FIND_CHILD_MIN_SCORE	1
> -#define FIND_CHILD_MAX_SCORE	2
> +#define FIND_CHILD_MAX_SCORE	3
>  
>  static int match_any(struct acpi_device *adev, void *not_used)
>  {
> @@ -89,15 +89,25 @@ static bool acpi_dev_has_children(struct acpi_device *adev)
>  
>  static int find_child_checks(struct acpi_device *adev, bool check_children)
>  {
> +	int score = FIND_CHILD_MIN_SCORE;
>  	unsigned long long sta;
>  	acpi_status status;
>  
>  	if (check_children && !acpi_dev_has_children(adev))
>  		return -ENODEV;
>  
> +	/*
> +	 * For devices without a _STA method, prefer devices without a _HID
> +	 * (which conflicts with having an _ADR) but which have been matched
> +	 * in some other way, like e.g. by acpi_is_video_device() over devices
> +	 * with no ids at all.
> +	 */
> +	if (!adev->pnp.type.platform_id && adev->pnp.type.hardware_id)
> +		score++;
> +
>  	status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta);
>  	if (status == AE_NOT_FOUND)
> -		return FIND_CHILD_MIN_SCORE;
> +		return score;
>  
>  	if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
>  		return -ENODEV;
Rafael J. Wysocki Jan. 10, 2023, 1:33 p.m. UTC | #2
On Monday, January 9, 2023 9:57:21 PM CET Hans de Goede wrote:
> The Dell Latitude E6430 both with and without the optional NVidia dGPU
> has a bug in its ACPI tables which is causing Linux to assign the wrong
> ACPI fwnode / companion to the pci_device for the i915 iGPU.
> 
> Specifically under the PCI root bridge there are these 2 ACPI Device()s :
> 
>  Scope (_SB.PCI0)
>  {
>      Device (GFX0)
>      {
>          Name (_ADR, 0x00020000)  // _ADR: Address
>      }
> 
>      ...
> 
>      Device (VID)
>      {
>          Name (_ADR, 0x00020000)  // _ADR: Address
>          ...
> 
>          Method (_DOS, 1, NotSerialized)  // _DOS: Disable Output Switching
>          {
>              VDP8 = Arg0
>              VDP1 (One, VDP8)
>          }
> 
>          Method (_DOD, 0, NotSerialized)  // _DOD: Display Output Devices
>          {
>              ...
>          }
>          ...
>      }
>  }
> 
> The non-functional GFX0 ACPI device is a problem, because this gets
> returned as ACPI companion-device by acpi_find_child_device() for the iGPU.
> 
> This is a long standing problem and the i915 driver does use the ACPI
> companion for some things, but works fine without it.
> 
> However since commit 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()")
> acpi_get_pci_dev() relies on the physical-node pointer in the acpi_device
> and that is set on the wrong acpi_device because of the wrong
> acpi_find_child_device() return. This breaks the ACPI video code, leading
> to non working backlight control in some cases.

Interesting.  Sorry for the trouble.

> Make find_child_checks() return a higher score for children which have
> pnp-ids set by various scan helpers like acpi_is_video_device(), so
> that it picks the right companion-device.

This has a potential of changing the behavior in some cases that are not
relevant here which is generally risky.

> An alternative approach would be to directly call acpi_is_video_device()
> from find_child_checks() but that would be somewhat computationally
> expensive given that acpi_find_child_device() iterates over all the
> PCI0 children every time it is called.

I agree with the above, but my fix would be something like the patch below (not
really tested, but it builds).

---
 drivers/acpi/glue.c     |   14 ++++++++++++--
 drivers/acpi/scan.c     |    7 +++++--
 include/acpi/acpi_bus.h |    3 ++-
 3 files changed, 19 insertions(+), 5 deletions(-)

Index: linux-pm/include/acpi/acpi_bus.h
===================================================================
--- linux-pm.orig/include/acpi/acpi_bus.h
+++ linux-pm/include/acpi/acpi_bus.h
@@ -230,7 +230,8 @@ struct acpi_pnp_type {
 	u32 hardware_id:1;
 	u32 bus_address:1;
 	u32 platform_id:1;
-	u32 reserved:29;
+	u32 backlight:1;
+	u32 reserved:28;
 };
 
 struct acpi_device_pnp {
Index: linux-pm/drivers/acpi/scan.c
===================================================================
--- linux-pm.orig/drivers/acpi/scan.c
+++ linux-pm/drivers/acpi/scan.c
@@ -1370,9 +1370,12 @@ static void acpi_set_pnp_ids(acpi_handle
 		 * Some devices don't reliably have _HIDs & _CIDs, so add
 		 * synthetic HIDs to make sure drivers can find them.
 		 */
-		if (acpi_is_video_device(handle))
+		if (acpi_is_video_device(handle)) {
 			acpi_add_id(pnp, ACPI_VIDEO_HID);
-		else if (acpi_bay_match(handle))
+			pnp->type.backlight = 1;
+			break;
+		}
+		if (acpi_bay_match(handle))
 			acpi_add_id(pnp, ACPI_BAY_HID);
 		else if (acpi_dock_match(handle))
 			acpi_add_id(pnp, ACPI_DOCK_HID);
Index: linux-pm/drivers/acpi/glue.c
===================================================================
--- linux-pm.orig/drivers/acpi/glue.c
+++ linux-pm/drivers/acpi/glue.c
@@ -75,7 +75,8 @@ static struct acpi_bus_type *acpi_get_bu
 }
 
 #define FIND_CHILD_MIN_SCORE	1
-#define FIND_CHILD_MAX_SCORE	2
+#define FIND_CHILD_MID_SCORE	2
+#define FIND_CHILD_MAX_SCORE	3
 
 static int match_any(struct acpi_device *adev, void *not_used)
 {
@@ -96,8 +97,17 @@ static int find_child_checks(struct acpi
 		return -ENODEV;
 
 	status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta);
-	if (status == AE_NOT_FOUND)
+	if (status == AE_NOT_FOUND) {
+		/*
+		 * Special case: backlight device objects without _STA are
+		 * preferred to other objects with the same _ADR value, because
+		 * it is more likely that they are actually useful.
+		 */
+		if (adev->pnp.type.backlight)
+			return FIND_CHILD_MID_SCORE;
+
 		return FIND_CHILD_MIN_SCORE;
+	}
 
 	if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
 		return -ENODEV;
Hans de Goede Jan. 10, 2023, 3:29 p.m. UTC | #3
Hi Rafael,

On 1/10/23 14:33, Rafael J. Wysocki wrote:
> On Monday, January 9, 2023 9:57:21 PM CET Hans de Goede wrote:
>> The Dell Latitude E6430 both with and without the optional NVidia dGPU
>> has a bug in its ACPI tables which is causing Linux to assign the wrong
>> ACPI fwnode / companion to the pci_device for the i915 iGPU.
>>
>> Specifically under the PCI root bridge there are these 2 ACPI Device()s :
>>
>>  Scope (_SB.PCI0)
>>  {
>>      Device (GFX0)
>>      {
>>          Name (_ADR, 0x00020000)  // _ADR: Address
>>      }
>>
>>      ...
>>
>>      Device (VID)
>>      {
>>          Name (_ADR, 0x00020000)  // _ADR: Address
>>          ...
>>
>>          Method (_DOS, 1, NotSerialized)  // _DOS: Disable Output Switching
>>          {
>>              VDP8 = Arg0
>>              VDP1 (One, VDP8)
>>          }
>>
>>          Method (_DOD, 0, NotSerialized)  // _DOD: Display Output Devices
>>          {
>>              ...
>>          }
>>          ...
>>      }
>>  }
>>
>> The non-functional GFX0 ACPI device is a problem, because this gets
>> returned as ACPI companion-device by acpi_find_child_device() for the iGPU.
>>
>> This is a long standing problem and the i915 driver does use the ACPI
>> companion for some things, but works fine without it.
>>
>> However since commit 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()")
>> acpi_get_pci_dev() relies on the physical-node pointer in the acpi_device
>> and that is set on the wrong acpi_device because of the wrong
>> acpi_find_child_device() return. This breaks the ACPI video code, leading
>> to non working backlight control in some cases.
> 
> Interesting.  Sorry for the trouble.

No problem, as mentioned this is actually a long standing issue / bug
in the ACPI tables, it just never surfaced before.

>> Make find_child_checks() return a higher score for children which have
>> pnp-ids set by various scan helpers like acpi_is_video_device(), so
>> that it picks the right companion-device.
> 
> This has a potential of changing the behavior in some cases that are not
> relevant here which is generally risky.
> 
>> An alternative approach would be to directly call acpi_is_video_device()
>> from find_child_checks() but that would be somewhat computationally
>> expensive given that acpi_find_child_device() iterates over all the
>> PCI0 children every time it is called.
> 
> I agree with the above, but my fix would be something like the patch below (not
> really tested, but it builds).

Thanks, I have just given this a spin on my E6430 and I can confirm
it still fixes things.

I'll send out this version (re-using most of the v1 commitmsg) as a v2
right away.

Regards,

Hans





> 
> ---
>  drivers/acpi/glue.c     |   14 ++++++++++++--
>  drivers/acpi/scan.c     |    7 +++++--
>  include/acpi/acpi_bus.h |    3 ++-
>  3 files changed, 19 insertions(+), 5 deletions(-)
> 
> Index: linux-pm/include/acpi/acpi_bus.h
> ===================================================================
> --- linux-pm.orig/include/acpi/acpi_bus.h
> +++ linux-pm/include/acpi/acpi_bus.h
> @@ -230,7 +230,8 @@ struct acpi_pnp_type {
>  	u32 hardware_id:1;
>  	u32 bus_address:1;
>  	u32 platform_id:1;
> -	u32 reserved:29;
> +	u32 backlight:1;
> +	u32 reserved:28;
>  };
>  
>  struct acpi_device_pnp {
> Index: linux-pm/drivers/acpi/scan.c
> ===================================================================
> --- linux-pm.orig/drivers/acpi/scan.c
> +++ linux-pm/drivers/acpi/scan.c
> @@ -1370,9 +1370,12 @@ static void acpi_set_pnp_ids(acpi_handle
>  		 * Some devices don't reliably have _HIDs & _CIDs, so add
>  		 * synthetic HIDs to make sure drivers can find them.
>  		 */
> -		if (acpi_is_video_device(handle))
> +		if (acpi_is_video_device(handle)) {
>  			acpi_add_id(pnp, ACPI_VIDEO_HID);
> -		else if (acpi_bay_match(handle))
> +			pnp->type.backlight = 1;
> +			break;
> +		}
> +		if (acpi_bay_match(handle))
>  			acpi_add_id(pnp, ACPI_BAY_HID);
>  		else if (acpi_dock_match(handle))
>  			acpi_add_id(pnp, ACPI_DOCK_HID);
> Index: linux-pm/drivers/acpi/glue.c
> ===================================================================
> --- linux-pm.orig/drivers/acpi/glue.c
> +++ linux-pm/drivers/acpi/glue.c
> @@ -75,7 +75,8 @@ static struct acpi_bus_type *acpi_get_bu
>  }
>  
>  #define FIND_CHILD_MIN_SCORE	1
> -#define FIND_CHILD_MAX_SCORE	2
> +#define FIND_CHILD_MID_SCORE	2
> +#define FIND_CHILD_MAX_SCORE	3
>  
>  static int match_any(struct acpi_device *adev, void *not_used)
>  {
> @@ -96,8 +97,17 @@ static int find_child_checks(struct acpi
>  		return -ENODEV;
>  
>  	status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta);
> -	if (status == AE_NOT_FOUND)
> +	if (status == AE_NOT_FOUND) {
> +		/*
> +		 * Special case: backlight device objects without _STA are
> +		 * preferred to other objects with the same _ADR value, because
> +		 * it is more likely that they are actually useful.
> +		 */
> +		if (adev->pnp.type.backlight)
> +			return FIND_CHILD_MID_SCORE;
> +
>  		return FIND_CHILD_MIN_SCORE;
> +	}
>  
>  	if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
>  		return -ENODEV;
> 
> 
>
diff mbox series

Patch

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 204fe94c7e45..2055dfd7678b 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -75,7 +75,7 @@  static struct acpi_bus_type *acpi_get_bus_type(struct device *dev)
 }
 
 #define FIND_CHILD_MIN_SCORE	1
-#define FIND_CHILD_MAX_SCORE	2
+#define FIND_CHILD_MAX_SCORE	3
 
 static int match_any(struct acpi_device *adev, void *not_used)
 {
@@ -89,15 +89,25 @@  static bool acpi_dev_has_children(struct acpi_device *adev)
 
 static int find_child_checks(struct acpi_device *adev, bool check_children)
 {
+	int score = FIND_CHILD_MIN_SCORE;
 	unsigned long long sta;
 	acpi_status status;
 
 	if (check_children && !acpi_dev_has_children(adev))
 		return -ENODEV;
 
+	/*
+	 * For devices without a _STA method, prefer devices without a _HID
+	 * (which conflicts with having an _ADR) but which have been matched
+	 * in some other way, like e.g. by acpi_is_video_device() over devices
+	 * with no ids at all.
+	 */
+	if (!adev->pnp.type.platform_id && adev->pnp.type.hardware_id)
+		score++;
+
 	status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta);
 	if (status == AE_NOT_FOUND)
-		return FIND_CHILD_MIN_SCORE;
+		return score;
 
 	if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
 		return -ENODEV;