diff mbox series

[v16,3/3] platform/x86/amd: pmc: Don't let PCIe root ports go into D3

Message ID 20230829171212.156688-4-mario.limonciello@amd.com (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series Avoid PCIe D3 for AMD PCIe root ports | expand

Commit Message

Mario Limonciello Aug. 29, 2023, 5:12 p.m. UTC
commit 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
changed pci_bridge_d3_possible() so that any vendor's PCIe ports
from modern machines (>=2015) are allowed to be put into D3.

Iain reports that USB devices can't be used to wake a Lenovo Z13
from suspend. This is because the PCIe root port has been put
into D3 and AMD's platform can't handle USB devices waking from
a hardware sleep state in this case.

This problem only occurs on Linux, and only when the AMD PMC driver
is utilized to put the device into a hardware sleep state. Comparing
the behavior on Windows and Linux, Windows doesn't put the root ports
into D3.

A variety of approaches were discussed to change PCI core to handle this
case generically but no consensus was reached. To limit the scope of
effect only to the affected machines introduce a workaround into the
amd-pmc driver to only apply to the PCI root ports in affected machines
when going into hardware sleep.

Link: https://lore.kernel.org/linux-pci/20230818193932.27187-1-mario.limonciello@amd.com/
Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
Reported-by: Iain Lane <iain@orangesquash.org.uk>
Closes: https://forums.lenovo.com/t5/Ubuntu/Z13-can-t-resume-from-suspend-with-external-USB-keyboard/m-p/5217121
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
v15->v16:
 * Only match PCIe root ports with ACPI companions
 * Use constraints when workaround activated
---
 drivers/platform/x86/amd/pmc/pmc.c | 39 ++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

Comments

Shyam Sundar S K Sept. 5, 2023, 10:08 a.m. UTC | #1
On 8/29/2023 10:42 PM, Mario Limonciello wrote:
> commit 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
> changed pci_bridge_d3_possible() so that any vendor's PCIe ports
> from modern machines (>=2015) are allowed to be put into D3.
> 
> Iain reports that USB devices can't be used to wake a Lenovo Z13
> from suspend. This is because the PCIe root port has been put
> into D3 and AMD's platform can't handle USB devices waking from
> a hardware sleep state in this case.
> 
> This problem only occurs on Linux, and only when the AMD PMC driver
> is utilized to put the device into a hardware sleep state. Comparing
> the behavior on Windows and Linux, Windows doesn't put the root ports
> into D3.
> 
> A variety of approaches were discussed to change PCI core to handle this
> case generically but no consensus was reached. To limit the scope of
> effect only to the affected machines introduce a workaround into the
> amd-pmc driver to only apply to the PCI root ports in affected machines
> when going into hardware sleep.
> 
> Link: https://lore.kernel.org/linux-pci/20230818193932.27187-1-mario.limonciello@amd.com/
> Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
> Reported-by: Iain Lane <iain@orangesquash.org.uk>
> Closes: https://forums.lenovo.com/t5/Ubuntu/Z13-can-t-resume-from-suspend-with-external-USB-keyboard/m-p/5217121
> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>

See if this change can be moved to pmc-quirks.c, besides that change
looks good to me. Thank you.

Acked-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>

> ---
> v15->v16:
>  * Only match PCIe root ports with ACPI companions
>  * Use constraints when workaround activated
> ---
>  drivers/platform/x86/amd/pmc/pmc.c | 39 ++++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)
> 
> diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c
> index eb2a4263814c..6a037447ec5a 100644
> --- a/drivers/platform/x86/amd/pmc/pmc.c
> +++ b/drivers/platform/x86/amd/pmc/pmc.c
> @@ -741,6 +741,41 @@ static int amd_pmc_czn_wa_irq1(struct amd_pmc_dev *pdev)
>  	return 0;
>  }
>  
> +/* only allow PCIe root ports with a LPS0 constraint configured to go to D3 */
> +static int amd_pmc_rp_wa(struct amd_pmc_dev *pdev)
> +{
> +	struct pci_dev *pci_dev = NULL;
> +
> +	while ((pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_ANY_ID, pci_dev))) {
> +		struct acpi_device *adev;
> +		int constraint;
> +
> +		if (!pci_is_pcie(pci_dev) ||
> +		    !(pci_pcie_type(pci_dev) == PCI_EXP_TYPE_ROOT_PORT))
> +			continue;
> +
> +		if (pci_dev->current_state == PCI_D3hot ||
> +		    pci_dev->current_state == PCI_D3cold)
> +			continue;
> +
> +		adev = ACPI_COMPANION(&pci_dev->dev);
> +		if (!adev)
> +			continue;
> +
> +		constraint = acpi_get_lps0_constraint(adev);
> +		if (constraint != ACPI_STATE_UNKNOWN &&
> +		    constraint >= ACPI_STATE_S3)
> +			continue;
> +
> +		if (pci_dev->bridge_d3 == 0)
> +			continue;
> +		pci_dev->bridge_d3 = 0;
> +		dev_info(&pci_dev->dev, "Disabling D3 on PCIe root port due lack of constraint\n");
> +	}
> +
> +	return 0;
> +}
> +
>  static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg)
>  {
>  	struct rtc_device *rtc_device;
> @@ -893,6 +928,10 @@ static int amd_pmc_suspend_handler(struct device *dev)
>  	case AMD_CPU_ID_CZN:
>  		rc = amd_pmc_czn_wa_irq1(pdev);
>  		break;
> +	case AMD_CPU_ID_YC:
> +	case AMD_CPU_ID_PS:
> +		rc = amd_pmc_rp_wa(pdev);
> +		break;
>  	default:
>  		break;
>  	}
>
Hans de Goede Sept. 5, 2023, 10:15 a.m. UTC | #2
Hi Shyam,

On 9/5/23 12:08, Shyam Sundar S K wrote:
> 
> 
> On 8/29/2023 10:42 PM, Mario Limonciello wrote:
>> commit 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
>> changed pci_bridge_d3_possible() so that any vendor's PCIe ports
>> from modern machines (>=2015) are allowed to be put into D3.
>>
>> Iain reports that USB devices can't be used to wake a Lenovo Z13
>> from suspend. This is because the PCIe root port has been put
>> into D3 and AMD's platform can't handle USB devices waking from
>> a hardware sleep state in this case.
>>
>> This problem only occurs on Linux, and only when the AMD PMC driver
>> is utilized to put the device into a hardware sleep state. Comparing
>> the behavior on Windows and Linux, Windows doesn't put the root ports
>> into D3.
>>
>> A variety of approaches were discussed to change PCI core to handle this
>> case generically but no consensus was reached. To limit the scope of
>> effect only to the affected machines introduce a workaround into the
>> amd-pmc driver to only apply to the PCI root ports in affected machines
>> when going into hardware sleep.
>>
>> Link: https://lore.kernel.org/linux-pci/20230818193932.27187-1-mario.limonciello@amd.com/
>> Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
>> Reported-by: Iain Lane <iain@orangesquash.org.uk>
>> Closes: https://forums.lenovo.com/t5/Ubuntu/Z13-can-t-resume-from-suspend-with-external-USB-keyboard/m-p/5217121
>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> 
> See if this change can be moved to pmc-quirks.c, besides that change
> looks good to me. Thank you.
> 
> Acked-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>

Thank you for the review.

I also just replied to this series (to the cover-letter)
with an alternative approach based on making the
XHCI driver call pci_d3cold_disable() on the XHCI
PCIe-device on affected AMD chipsets.

That seems like a cleaner approach to me. I wonder
if you have any remarks about that approach ?

Regards,

Hans


> 
>> ---
>> v15->v16:
>>  * Only match PCIe root ports with ACPI companions
>>  * Use constraints when workaround activated
>> ---
>>  drivers/platform/x86/amd/pmc/pmc.c | 39 ++++++++++++++++++++++++++++++
>>  1 file changed, 39 insertions(+)
>>
>> diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c
>> index eb2a4263814c..6a037447ec5a 100644
>> --- a/drivers/platform/x86/amd/pmc/pmc.c
>> +++ b/drivers/platform/x86/amd/pmc/pmc.c
>> @@ -741,6 +741,41 @@ static int amd_pmc_czn_wa_irq1(struct amd_pmc_dev *pdev)
>>  	return 0;
>>  }
>>  
>> +/* only allow PCIe root ports with a LPS0 constraint configured to go to D3 */
>> +static int amd_pmc_rp_wa(struct amd_pmc_dev *pdev)
>> +{
>> +	struct pci_dev *pci_dev = NULL;
>> +
>> +	while ((pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_ANY_ID, pci_dev))) {
>> +		struct acpi_device *adev;
>> +		int constraint;
>> +
>> +		if (!pci_is_pcie(pci_dev) ||
>> +		    !(pci_pcie_type(pci_dev) == PCI_EXP_TYPE_ROOT_PORT))
>> +			continue;
>> +
>> +		if (pci_dev->current_state == PCI_D3hot ||
>> +		    pci_dev->current_state == PCI_D3cold)
>> +			continue;
>> +
>> +		adev = ACPI_COMPANION(&pci_dev->dev);
>> +		if (!adev)
>> +			continue;
>> +
>> +		constraint = acpi_get_lps0_constraint(adev);
>> +		if (constraint != ACPI_STATE_UNKNOWN &&
>> +		    constraint >= ACPI_STATE_S3)
>> +			continue;
>> +
>> +		if (pci_dev->bridge_d3 == 0)
>> +			continue;
>> +		pci_dev->bridge_d3 = 0;
>> +		dev_info(&pci_dev->dev, "Disabling D3 on PCIe root port due lack of constraint\n");
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>  static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg)
>>  {
>>  	struct rtc_device *rtc_device;
>> @@ -893,6 +928,10 @@ static int amd_pmc_suspend_handler(struct device *dev)
>>  	case AMD_CPU_ID_CZN:
>>  		rc = amd_pmc_czn_wa_irq1(pdev);
>>  		break;
>> +	case AMD_CPU_ID_YC:
>> +	case AMD_CPU_ID_PS:
>> +		rc = amd_pmc_rp_wa(pdev);
>> +		break;
>>  	default:
>>  		break;
>>  	}
>>
>
Mario Limonciello Sept. 5, 2023, 7:57 p.m. UTC | #3
On 9/5/2023 05:15, Hans de Goede wrote:
> Hi Shyam,
> 
> On 9/5/23 12:08, Shyam Sundar S K wrote:
>>
>>
>> On 8/29/2023 10:42 PM, Mario Limonciello wrote:
>>> commit 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
>>> changed pci_bridge_d3_possible() so that any vendor's PCIe ports
>>> from modern machines (>=2015) are allowed to be put into D3.
>>>
>>> Iain reports that USB devices can't be used to wake a Lenovo Z13
>>> from suspend. This is because the PCIe root port has been put
>>> into D3 and AMD's platform can't handle USB devices waking from
>>> a hardware sleep state in this case.
>>>
>>> This problem only occurs on Linux, and only when the AMD PMC driver
>>> is utilized to put the device into a hardware sleep state. Comparing
>>> the behavior on Windows and Linux, Windows doesn't put the root ports
>>> into D3.
>>>
>>> A variety of approaches were discussed to change PCI core to handle this
>>> case generically but no consensus was reached. To limit the scope of
>>> effect only to the affected machines introduce a workaround into the
>>> amd-pmc driver to only apply to the PCI root ports in affected machines
>>> when going into hardware sleep.
>>>
>>> Link: https://lore.kernel.org/linux-pci/20230818193932.27187-1-mario.limonciello@amd.com/
>>> Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
>>> Reported-by: Iain Lane <iain@orangesquash.org.uk>
>>> Closes: https://forums.lenovo.com/t5/Ubuntu/Z13-can-t-resume-from-suspend-with-external-USB-keyboard/m-p/5217121
>>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>>
>> See if this change can be moved to pmc-quirks.c, besides that change
>> looks good to me. Thank you.
>>
>> Acked-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
> 
> Thank you for the review.
> 
> I also just replied to this series (to the cover-letter)
> with an alternative approach based on making the
> XHCI driver call pci_d3cold_disable() on the XHCI
> PCIe-device on affected AMD chipsets.
> 
> That seems like a cleaner approach to me. I wonder
> if you have any remarks about that approach ?
> 

I was thinking more about Hans' comments to the cover letter as well as 
Shyam's comments to move it into pmc-quirks.c.

Perhaps it would better be conveying what's going on by having a 
dedicated step that amd-pmc calls pci_choose_state() for each PCIe 
device and checks the value against the constraints.  If "any" of them 
are mismatched it could emit a message.  This is a little bit of 
duplication though because drivers/acpi/x86/s2idle.c already also emits 
a similar message for some devices when pm_debug_messages is enabled.

Then the special case would be for PCIe root ports that are mismatched 
the driver overrides it.  If this logic change is wouldn't make sense 
for it to be moved into pmc-quirks.c.

I don't think using pci_d3cold_disable() / pci_d3cold_enable() is 
correct though.  If PCI core stays the same it should still be setting 
pdev->bridge_d3 to zero.  The problem isn't with D3cold on the PCIe RP 
at s2didle, it's with D3hot.


> Regards,
> 
> Hans
> 
> 
>>
>>> ---
>>> v15->v16:
>>>   * Only match PCIe root ports with ACPI companions
>>>   * Use constraints when workaround activated
>>> ---
>>>   drivers/platform/x86/amd/pmc/pmc.c | 39 ++++++++++++++++++++++++++++++
>>>   1 file changed, 39 insertions(+)
>>>
>>> diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c
>>> index eb2a4263814c..6a037447ec5a 100644
>>> --- a/drivers/platform/x86/amd/pmc/pmc.c
>>> +++ b/drivers/platform/x86/amd/pmc/pmc.c
>>> @@ -741,6 +741,41 @@ static int amd_pmc_czn_wa_irq1(struct amd_pmc_dev *pdev)
>>>   	return 0;
>>>   }
>>>   
>>> +/* only allow PCIe root ports with a LPS0 constraint configured to go to D3 */
>>> +static int amd_pmc_rp_wa(struct amd_pmc_dev *pdev)
>>> +{
>>> +	struct pci_dev *pci_dev = NULL;
>>> +
>>> +	while ((pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_ANY_ID, pci_dev))) {
>>> +		struct acpi_device *adev;
>>> +		int constraint;
>>> +
>>> +		if (!pci_is_pcie(pci_dev) ||
>>> +		    !(pci_pcie_type(pci_dev) == PCI_EXP_TYPE_ROOT_PORT))
>>> +			continue;
>>> +
>>> +		if (pci_dev->current_state == PCI_D3hot ||
>>> +		    pci_dev->current_state == PCI_D3cold)
>>> +			continue;
>>> +
>>> +		adev = ACPI_COMPANION(&pci_dev->dev);
>>> +		if (!adev)
>>> +			continue;
>>> +
>>> +		constraint = acpi_get_lps0_constraint(adev);
>>> +		if (constraint != ACPI_STATE_UNKNOWN &&
>>> +		    constraint >= ACPI_STATE_S3)
>>> +			continue;
>>> +
>>> +		if (pci_dev->bridge_d3 == 0)
>>> +			continue;
>>> +		pci_dev->bridge_d3 = 0;
>>> +		dev_info(&pci_dev->dev, "Disabling D3 on PCIe root port due lack of constraint\n");
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>   static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg)
>>>   {
>>>   	struct rtc_device *rtc_device;
>>> @@ -893,6 +928,10 @@ static int amd_pmc_suspend_handler(struct device *dev)
>>>   	case AMD_CPU_ID_CZN:
>>>   		rc = amd_pmc_czn_wa_irq1(pdev);
>>>   		break;
>>> +	case AMD_CPU_ID_YC:
>>> +	case AMD_CPU_ID_PS:
>>> +		rc = amd_pmc_rp_wa(pdev);
>>> +		break;
>>>   	default:
>>>   		break;
>>>   	}
>>>
>>
>
Rafael J. Wysocki Sept. 5, 2023, 8:21 p.m. UTC | #4
On Tue, Sep 5, 2023 at 9:57 PM Mario Limonciello
<mario.limonciello@amd.com> wrote:
>
> On 9/5/2023 05:15, Hans de Goede wrote:
> > Hi Shyam,
> >
> > On 9/5/23 12:08, Shyam Sundar S K wrote:
> >>
> >>
> >> On 8/29/2023 10:42 PM, Mario Limonciello wrote:
> >>> commit 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
> >>> changed pci_bridge_d3_possible() so that any vendor's PCIe ports
> >>> from modern machines (>=2015) are allowed to be put into D3.
> >>>
> >>> Iain reports that USB devices can't be used to wake a Lenovo Z13
> >>> from suspend. This is because the PCIe root port has been put
> >>> into D3 and AMD's platform can't handle USB devices waking from
> >>> a hardware sleep state in this case.
> >>>
> >>> This problem only occurs on Linux, and only when the AMD PMC driver
> >>> is utilized to put the device into a hardware sleep state. Comparing
> >>> the behavior on Windows and Linux, Windows doesn't put the root ports
> >>> into D3.
> >>>
> >>> A variety of approaches were discussed to change PCI core to handle this
> >>> case generically but no consensus was reached. To limit the scope of
> >>> effect only to the affected machines introduce a workaround into the
> >>> amd-pmc driver to only apply to the PCI root ports in affected machines
> >>> when going into hardware sleep.
> >>>
> >>> Link: https://lore.kernel.org/linux-pci/20230818193932.27187-1-mario.limonciello@amd.com/
> >>> Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
> >>> Reported-by: Iain Lane <iain@orangesquash.org.uk>
> >>> Closes: https://forums.lenovo.com/t5/Ubuntu/Z13-can-t-resume-from-suspend-with-external-USB-keyboard/m-p/5217121
> >>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> >>
> >> See if this change can be moved to pmc-quirks.c, besides that change
> >> looks good to me. Thank you.
> >>
> >> Acked-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
> >
> > Thank you for the review.
> >
> > I also just replied to this series (to the cover-letter)
> > with an alternative approach based on making the
> > XHCI driver call pci_d3cold_disable() on the XHCI
> > PCIe-device on affected AMD chipsets.
> >
> > That seems like a cleaner approach to me. I wonder
> > if you have any remarks about that approach ?
> >
>
> I was thinking more about Hans' comments to the cover letter as well as
> Shyam's comments to move it into pmc-quirks.c.
>
> Perhaps it would better be conveying what's going on by having a
> dedicated step that amd-pmc calls pci_choose_state() for each PCIe
> device and checks the value against the constraints.  If "any" of them
> are mismatched it could emit a message.  This is a little bit of
> duplication though because drivers/acpi/x86/s2idle.c already also emits
> a similar message for some devices when pm_debug_messages is enabled.
>
> Then the special case would be for PCIe root ports that are mismatched
> the driver overrides it.  If this logic change is wouldn't make sense
> for it to be moved into pmc-quirks.c.
>
> I don't think using pci_d3cold_disable() / pci_d3cold_enable() is
> correct though.  If PCI core stays the same it should still be setting
> pdev->bridge_d3 to zero.  The problem isn't with D3cold on the PCIe RP
> at s2didle, it's with D3hot.

Well, it's not even that.

If there were no devices expected to wake up the system from sleep
under the given Root Port, it might very well go into D3hot IIUC, so
the wakeup capability seems to be the key property here.

I need to think a bit more about this.
Bjorn Helgaas Sept. 5, 2023, 8:51 p.m. UTC | #5
[+cc Hans]

On Tue, Aug 29, 2023 at 12:12:12PM -0500, Mario Limonciello wrote:
> commit 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
> changed pci_bridge_d3_possible() so that any vendor's PCIe ports
> from modern machines (>=2015) are allowed to be put into D3.
> 
> Iain reports that USB devices can't be used to wake a Lenovo Z13
> from suspend. This is because the PCIe root port has been put
> into D3 and AMD's platform can't handle USB devices waking from
> a hardware sleep state in this case.

Can you be specific in the subject and commit log about whether "D3"
refers to "D3hot", "D3cold", or both?  It's probably obvious to PM
folks, but it's always a stumbling block for me.

I assume "can't handle USB devices waking" does not refer to a problem
with the USB adapter and whatever mechanism it uses to send a wakeup
event to request that power be turned on, but rather it means that the
wakeup event doesn't get propagated through the Root Port?

Is this actually specific to USB devices?  Or could a NIC below the
Root Port suffer the same problem when a wake-on-lan packet causes it
to send a wakeup event?  It seems like we've had this conversation
before; sorry to ask the same questions again.

If it's not specific to USB, I would say something like "when the Root
Port is in D3cold, wakeup events from devices below it are lost" (or
whatever the actual problem is).

> This problem only occurs on Linux, and only when the AMD PMC driver
> is utilized to put the device into a hardware sleep state.

Is the AMD PMC driver doing something magic that can't be done via
other power management paths?  That's what "only when the AMD PMC
driver is utilized" suggests.  But if the problem occurs when the Root
Port is put into D3cold via *any* means, just say that.

And if you can say a specific PCI power state instead of "hardware
sleep state", that would be good, too.

> Comparing
> the behavior on Windows and Linux, Windows doesn't put the root ports
> into D3.
> 
> A variety of approaches were discussed to change PCI core to handle this
> case generically but no consensus was reached. To limit the scope of
> effect only to the affected machines introduce a workaround into the
> amd-pmc driver to only apply to the PCI root ports in affected machines
> when going into hardware sleep.

> +/* only allow PCIe root ports with a LPS0 constraint configured to go to D3 */
> +static int amd_pmc_rp_wa(struct amd_pmc_dev *pdev)
> +{
> +	struct pci_dev *pci_dev = NULL;
> +
> +	while ((pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_ANY_ID, pci_dev))) {

I hate to add more uses of pci_get_device() because it doesn't account
for hot-added devices.  Maybe there's no need to support hot-add of
AMD Root Ports, but that's not obvious to readers here.

One mechanism to avoid pci_get_device() is to use quirks, although it
might be hard to deal with PCI/ACPI ordering issues.

> +		struct acpi_device *adev;
> +		int constraint;
> +
> +		if (!pci_is_pcie(pci_dev) ||
> +		    !(pci_pcie_type(pci_dev) == PCI_EXP_TYPE_ROOT_PORT))
> +			continue;
> +
> +		if (pci_dev->current_state == PCI_D3hot ||
> +		    pci_dev->current_state == PCI_D3cold)
> +			continue;

If we're trying to determine a property of the device, why does the
current power state make a difference?

It looks like this loop runs every time we suspend (from
amd_pmc_suspend_handler()), even though this is something we should
know at boot-time, so we only need it once.

> +		adev = ACPI_COMPANION(&pci_dev->dev);
> +		if (!adev)
> +			continue;
> +
> +		constraint = acpi_get_lps0_constraint(adev);
> +		if (constraint != ACPI_STATE_UNKNOWN &&
> +		    constraint >= ACPI_STATE_S3)
> +			continue;
> +
> +		if (pci_dev->bridge_d3 == 0)
> +			continue;
> +		pci_dev->bridge_d3 = 0;
> +		dev_info(&pci_dev->dev, "Disabling D3 on PCIe root port due lack of constraint\n");

D3hot?  D3cold?  Both?  "lack of constraint"?

> +	}
> +
> +	return 0;
> +}
> +
>  static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg)
>  {
>  	struct rtc_device *rtc_device;
> @@ -893,6 +928,10 @@ static int amd_pmc_suspend_handler(struct device *dev)
>  	case AMD_CPU_ID_CZN:
>  		rc = amd_pmc_czn_wa_irq1(pdev);
>  		break;
> +	case AMD_CPU_ID_YC:
> +	case AMD_CPU_ID_PS:
> +		rc = amd_pmc_rp_wa(pdev);
> +		break;
>  	default:
>  		break;
>  	}
> -- 
> 2.34.1
>
Mario Limonciello Sept. 5, 2023, 10:16 p.m. UTC | #6
On 9/5/2023 15:51, Bjorn Helgaas wrote:
> [+cc Hans]
> 
> On Tue, Aug 29, 2023 at 12:12:12PM -0500, Mario Limonciello wrote:
>> commit 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend")
>> changed pci_bridge_d3_possible() so that any vendor's PCIe ports
>> from modern machines (>=2015) are allowed to be put into D3.
>>
>> Iain reports that USB devices can't be used to wake a Lenovo Z13
>> from suspend. This is because the PCIe root port has been put
>> into D3 and AMD's platform can't handle USB devices waking from
>> a hardware sleep state in this case.
> 
> Can you be specific in the subject and commit log about whether "D3"
> refers to "D3hot", "D3cold", or both?  It's probably obvious to PM
> folks, but it's always a stumbling block for me.
> 
> I assume "can't handle USB devices waking" does not refer to a problem
> with the USB adapter and whatever mechanism it uses to send a wakeup
> event to request that power be turned on, but rather it means that the
> wakeup event doesn't get propagated through the Root Port?
> 
> Is this actually specific to USB devices?  Or could a NIC below the
> Root Port suffer the same problem when a wake-on-lan packet causes it
> to send a wakeup event?  It seems like we've had this conversation
> before; sorry to ask the same questions again.
> 
> If it's not specific to USB, I would say something like "when the Root
> Port is in D3cold, wakeup events from devices below it are lost" (or
> whatever the actual problem is).

The problem is specific to the root port in D3hot over s2idle after the 
hardware has entered the deepest state.

It's fine any other time.

This particular root port only connects to the XHCI controllers and USB4 
controllers, so I can't confirm whether anything else is affected.

> 
>> This problem only occurs on Linux, and only when the AMD PMC driver
>> is utilized to put the device into a hardware sleep state.
> 
> Is the AMD PMC driver doing something magic that can't be done via
> other power management paths?  That's what "only when the AMD PMC
> driver is utilized" suggests.  But if the problem occurs when the Root
> Port is put into D3cold via *any* means, just say that.
> 
> And if you can say a specific PCI power state instead of "hardware
> sleep state", that would be good, too.

Yes; the AMD PMC driver does a notification to the platform that the OS 
is ready for it to go into a hardware sleep state [1].

If the AMD PMC driver isn't used, the platform is not notified that the 
OS is ready for it to go into hardware sleep state, and this issue will 
not occur.

So the PCI root port being in D3 while the hardware is in a sleep state 
is very accurate.

[1] 
https://github.com/torvalds/linux/blob/v6.5/drivers/platform/x86/amd/pmc.c#L816

> 
>> Comparing
>> the behavior on Windows and Linux, Windows doesn't put the root ports
>> into D3.
>>
>> A variety of approaches were discussed to change PCI core to handle this
>> case generically but no consensus was reached. To limit the scope of
>> effect only to the affected machines introduce a workaround into the
>> amd-pmc driver to only apply to the PCI root ports in affected machines
>> when going into hardware sleep.
> 
>> +/* only allow PCIe root ports with a LPS0 constraint configured to go to D3 */
>> +static int amd_pmc_rp_wa(struct amd_pmc_dev *pdev)
>> +{
>> +	struct pci_dev *pci_dev = NULL;
>> +
>> +	while ((pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_ANY_ID, pci_dev))) {
> 
> I hate to add more uses of pci_get_device() because it doesn't account
> for hot-added devices.  Maybe there's no need to support hot-add of
> AMD Root Ports, but that's not obvious to readers here.

This function is only called during suspend, so it should cover hot 
added / hot removed devices.

If this ends up staying for v17 as is I'll add more verbose comments.

> 
> One mechanism to avoid pci_get_device() is to use quirks, although it
> might be hard to deal with PCI/ACPI ordering issues

I did quirks in an earlier version of this series, but you had feedback 
that the solution isn't scalable.  That's why it's morphed into this 
approach, which I'd like to think is more scalable as it looks at the 
constraints advertised by the platform in an AMD specific driver.

> 
>> +		struct acpi_device *adev;
>> +		int constraint;
>> +
>> +		if (!pci_is_pcie(pci_dev) ||
>> +		    !(pci_pcie_type(pci_dev) == PCI_EXP_TYPE_ROOT_PORT))
>> +			continue;
>> +
>> +		if (pci_dev->current_state == PCI_D3hot ||
>> +		    pci_dev->current_state == PCI_D3cold)
>> +			continue;
> 
> If we're trying to determine a property of the device, why does the
> current power state make a difference?

Hans left feedback in v15 that if the device was already in D3 at the 
time of this function it wouldn't work properly.  So I excluded those 
devices.

> 
> It looks like this loop runs every time we suspend (from
> amd_pmc_suspend_handler()), even though this is something we should
> know at boot-time, so we only need it once.

It's was because pci_bridge_d3_update() can be called and change it 
again in other places.

I think if we want to optimize it to only run a single time we need a 
new variable or bit in the pci_dev structure that can be used to mark 
such an exclusion which pci_bridge_d3_update() could take into account.

This could fit in well with Hans' idea of drivers could register a 
callback to "veto" D3 support.  It could be something like 
pci_bridge_d3_update() is called whenever a new driver 
registers/unregisters the callback.  It might also fit in well with your 
previous comments about how you want to separate "spec compliant" things 
and "quirk" things in pci_bridge_d3_possible().

Could you comment on that?  He suggested it in the cover letter responses.

> 
>> +		adev = ACPI_COMPANION(&pci_dev->dev);
>> +		if (!adev)
>> +			continue;
>> +
>> +		constraint = acpi_get_lps0_constraint(adev);
>> +		if (constraint != ACPI_STATE_UNKNOWN &&
>> +		    constraint >= ACPI_STATE_S3)
>> +			continue;
>> +
>> +		if (pci_dev->bridge_d3 == 0)
>> +			continue;
>> +		pci_dev->bridge_d3 = 0;
>> +		dev_info(&pci_dev->dev, "Disabling D3 on PCIe root port due lack of constraint\n");
> 
> D3hot?  D3cold?  Both?  "lack of constraint"?

It's disabling both, which is why I left it as D3 to cover both.  The 
lack of constraint can't be explained in a single line message.  If this 
is too noisy for a user and you think would cause more questions than 
help I'm fine to downgrade it to debug.

> 
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>   static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg)
>>   {
>>   	struct rtc_device *rtc_device;
>> @@ -893,6 +928,10 @@ static int amd_pmc_suspend_handler(struct device *dev)
>>   	case AMD_CPU_ID_CZN:
>>   		rc = amd_pmc_czn_wa_irq1(pdev);
>>   		break;
>> +	case AMD_CPU_ID_YC:
>> +	case AMD_CPU_ID_PS:
>> +		rc = amd_pmc_rp_wa(pdev);
>> +		break;
>>   	default:
>>   		break;
>>   	}
>> -- 
>> 2.34.1
>>
diff mbox series

Patch

diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c
index eb2a4263814c..6a037447ec5a 100644
--- a/drivers/platform/x86/amd/pmc/pmc.c
+++ b/drivers/platform/x86/amd/pmc/pmc.c
@@ -741,6 +741,41 @@  static int amd_pmc_czn_wa_irq1(struct amd_pmc_dev *pdev)
 	return 0;
 }
 
+/* only allow PCIe root ports with a LPS0 constraint configured to go to D3 */
+static int amd_pmc_rp_wa(struct amd_pmc_dev *pdev)
+{
+	struct pci_dev *pci_dev = NULL;
+
+	while ((pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_ANY_ID, pci_dev))) {
+		struct acpi_device *adev;
+		int constraint;
+
+		if (!pci_is_pcie(pci_dev) ||
+		    !(pci_pcie_type(pci_dev) == PCI_EXP_TYPE_ROOT_PORT))
+			continue;
+
+		if (pci_dev->current_state == PCI_D3hot ||
+		    pci_dev->current_state == PCI_D3cold)
+			continue;
+
+		adev = ACPI_COMPANION(&pci_dev->dev);
+		if (!adev)
+			continue;
+
+		constraint = acpi_get_lps0_constraint(adev);
+		if (constraint != ACPI_STATE_UNKNOWN &&
+		    constraint >= ACPI_STATE_S3)
+			continue;
+
+		if (pci_dev->bridge_d3 == 0)
+			continue;
+		pci_dev->bridge_d3 = 0;
+		dev_info(&pci_dev->dev, "Disabling D3 on PCIe root port due lack of constraint\n");
+	}
+
+	return 0;
+}
+
 static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg)
 {
 	struct rtc_device *rtc_device;
@@ -893,6 +928,10 @@  static int amd_pmc_suspend_handler(struct device *dev)
 	case AMD_CPU_ID_CZN:
 		rc = amd_pmc_czn_wa_irq1(pdev);
 		break;
+	case AMD_CPU_ID_YC:
+	case AMD_CPU_ID_PS:
+		rc = amd_pmc_rp_wa(pdev);
+		break;
 	default:
 		break;
 	}