diff mbox series

[V1] PCI/ASPM: Update saved buffers with latest ASPM configuration

Message ID 20230125133830.20620-1-vidyas@nvidia.com (mailing list archive)
State Changes Requested
Delegated to: Bjorn Helgaas
Headers show
Series [V1] PCI/ASPM: Update saved buffers with latest ASPM configuration | expand

Commit Message

Vidya Sagar Jan. 25, 2023, 1:38 p.m. UTC
Many PCIe device drivers save the configuration state of their respective
devices during probe and restore the same when their 'slot_reset' hook
is called through PCIe Error Recovery System.
If the system has a change in ASPM policy after the driver's probe is
called and before error event occurred, 'slot_reset' hook restores the
PCIe configuration state to what it was at the time of probe but not with
what it was just before the occurrence of the error event.
This effectively leads to a mismatch in the ASPM configuration between
the device and its upstream parent device.
This patch addresses that issue by updating the saved configuration state
of the device with the latest info whenever there is a change w.r.t ASPM
policy.

Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
---
 drivers/pci/pci.h       |  4 ++++
 drivers/pci/pcie/aspm.c | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

Comments

Wysocki, Rafael J Jan. 25, 2023, 3:01 p.m. UTC | #1
On 1/25/2023 2:38 PM, Vidya Sagar wrote:
> Many PCIe device drivers save the configuration state of their respective
> devices during probe and restore the same when their 'slot_reset' hook
> is called through PCIe Error Recovery System.
> If the system has a change in ASPM policy after the driver's probe is
> called and before error event occurred, 'slot_reset' hook restores the
> PCIe configuration state to what it was at the time of probe but not with
> what it was just before the occurrence of the error event.
> This effectively leads to a mismatch in the ASPM configuration between
> the device and its upstream parent device.
> This patch addresses that issue by updating the saved configuration state
> of the device with the latest info whenever there is a change w.r.t ASPM
> policy.
>
> Signed-off-by: Vidya Sagar <vidyas@nvidia.com>

If it is a bug fix (which I think it is), a Fixes tag should be present 
here.

If the reporter's names are known, Reported-by tags should be present 
here too.

If anyone except for you has tested this patch, a Tested-by tag should 
be present here.

> ---
>   drivers/pci/pci.h       |  4 ++++
>   drivers/pci/pcie/aspm.c | 40 ++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 44 insertions(+)
>
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 9ed3b5550043..f4a91d4fe96d 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -566,12 +566,16 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
>   void pcie_aspm_init_link_state(struct pci_dev *pdev);
>   void pcie_aspm_exit_link_state(struct pci_dev *pdev);
>   void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
> +void pci_save_aspm_state(struct pci_dev *dev);
> +void pci_restore_aspm_state(struct pci_dev *dev);
>   void pci_save_aspm_l1ss_state(struct pci_dev *dev);
>   void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
>   #else
>   static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
>   static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
>   static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
> +static inline void pci_save_aspm_state(struct pci_dev *dev) { }
> +static inline void pci_restore_aspm_state(struct pci_dev *dev) { }
>   static inline void pci_save_aspm_l1ss_state(struct pci_dev *dev) { }
>   static inline void pci_restore_aspm_l1ss_state(struct pci_dev *dev) { }
>   #endif
> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> index 53a1fa306e1e..f25e0440d36b 100644
> --- a/drivers/pci/pcie/aspm.c
> +++ b/drivers/pci/pcie/aspm.c
> @@ -151,6 +151,7 @@ static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable)
>   						   PCI_EXP_LNKCTL_CLKREQ_EN,
>   						   val);
>   	link->clkpm_enabled = !!enable;
> +	pci_save_aspm_state(child);
>   }
>   
>   static void pcie_set_clkpm(struct pcie_link_state *link, int enable)
> @@ -757,6 +758,39 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
>   				PCI_L1SS_CTL1_L1SS_MASK, val);
>   }
>   
> +void pci_save_aspm_state(struct pci_dev *dev)
> +{
> +	int i = 0;
> +	struct pci_cap_saved_state *save_state;
> +	u16 *cap;
> +
> +	if (!pci_is_pcie(dev))
> +		return;
> +
> +	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
> +	if (!save_state)
> +		return;
> +
> +	cap = (u16 *)&save_state->cap.data[0];
> +	i++;
> +	pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &cap[i++]);
> +}
> +
> +void pci_restore_aspm_state(struct pci_dev *dev)
> +{
> +	int i = 0;
> +	struct pci_cap_saved_state *save_state;
> +	u16 *cap;
> +
> +	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
> +	if (!save_state)
> +		return;
> +
> +	cap = (u16 *)&save_state->cap.data[0];
> +	i++;
> +	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, cap[i++]);
> +}
> +
>   void pci_save_aspm_l1ss_state(struct pci_dev *dev)
>   {
>   	struct pci_cap_saved_state *save_state;
> @@ -849,6 +883,12 @@ static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state)
>   		pcie_config_aspm_dev(parent, upstream);
>   
>   	link->aspm_enabled = state;
> +
> +	/* Update latest ASPM configuration in saved context */
> +	pci_save_aspm_state(link->downstream);
> +	pci_save_aspm_l1ss_state(link->downstream);
> +	pci_save_aspm_state(parent);
> +	pci_save_aspm_l1ss_state(parent);
>   }
>   
>   static void pcie_config_aspm_path(struct pcie_link_state *link)
Vidya Sagar Jan. 25, 2023, 5:22 p.m. UTC | #2
On 1/25/2023 8:31 PM, Wysocki, Rafael J wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 1/25/2023 2:38 PM, Vidya Sagar wrote:
>> Many PCIe device drivers save the configuration state of their respective
>> devices during probe and restore the same when their 'slot_reset' hook
>> is called through PCIe Error Recovery System.
>> If the system has a change in ASPM policy after the driver's probe is
>> called and before error event occurred, 'slot_reset' hook restores the
>> PCIe configuration state to what it was at the time of probe but not with
>> what it was just before the occurrence of the error event.
>> This effectively leads to a mismatch in the ASPM configuration between
>> the device and its upstream parent device.
>> This patch addresses that issue by updating the saved configuration state
>> of the device with the latest info whenever there is a change w.r.t ASPM
>> policy.
>>
>> Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
> 
> If it is a bug fix (which I think it is), a Fixes tag should be present
> here.

It is kind of a bug fix but I couldn't pin point to any particular 
commit that would have introduced it.

> 
> If the reporter's names are known, Reported-by tags should be present
> here too.

I was experimenting with the error handling code and happen to find this.

> 
> If anyone except for you has tested this patch, a Tested-by tag should
> be present here.

Only I tested this patch for now. It would be great if more verification 
is done on this patch.

Thanks,
Vidya Sagar

> 
>> ---
>>   drivers/pci/pci.h       |  4 ++++
>>   drivers/pci/pcie/aspm.c | 40 ++++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 44 insertions(+)
>>
>> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>> index 9ed3b5550043..f4a91d4fe96d 100644
>> --- a/drivers/pci/pci.h
>> +++ b/drivers/pci/pci.h
>> @@ -566,12 +566,16 @@ bool pcie_wait_for_link(struct pci_dev *pdev, 
>> bool active);
>>   void pcie_aspm_init_link_state(struct pci_dev *pdev);
>>   void pcie_aspm_exit_link_state(struct pci_dev *pdev);
>>   void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
>> +void pci_save_aspm_state(struct pci_dev *dev);
>> +void pci_restore_aspm_state(struct pci_dev *dev);
>>   void pci_save_aspm_l1ss_state(struct pci_dev *dev);
>>   void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
>>   #else
>>   static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
>>   static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
>>   static inline void pcie_aspm_powersave_config_link(struct pci_dev 
>> *pdev) { }
>> +static inline void pci_save_aspm_state(struct pci_dev *dev) { }
>> +static inline void pci_restore_aspm_state(struct pci_dev *dev) { }
>>   static inline void pci_save_aspm_l1ss_state(struct pci_dev *dev) { }
>>   static inline void pci_restore_aspm_l1ss_state(struct pci_dev *dev) { }
>>   #endif
>> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
>> index 53a1fa306e1e..f25e0440d36b 100644
>> --- a/drivers/pci/pcie/aspm.c
>> +++ b/drivers/pci/pcie/aspm.c
>> @@ -151,6 +151,7 @@ static void pcie_set_clkpm_nocheck(struct 
>> pcie_link_state *link, int enable)
>>                                                  
>> PCI_EXP_LNKCTL_CLKREQ_EN,
>>                                                  val);
>>       link->clkpm_enabled = !!enable;
>> +     pci_save_aspm_state(child);
>>   }
>>
>>   static void pcie_set_clkpm(struct pcie_link_state *link, int enable)
>> @@ -757,6 +758,39 @@ static void pcie_config_aspm_l1ss(struct 
>> pcie_link_state *link, u32 state)
>>                               PCI_L1SS_CTL1_L1SS_MASK, val);
>>   }
>>
>> +void pci_save_aspm_state(struct pci_dev *dev)
>> +{
>> +     int i = 0;
>> +     struct pci_cap_saved_state *save_state;
>> +     u16 *cap;
>> +
>> +     if (!pci_is_pcie(dev))
>> +             return;
>> +
>> +     save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
>> +     if (!save_state)
>> +             return;
>> +
>> +     cap = (u16 *)&save_state->cap.data[0];
>> +     i++;
>> +     pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &cap[i++]);
>> +}
>> +
>> +void pci_restore_aspm_state(struct pci_dev *dev)
>> +{
>> +     int i = 0;
>> +     struct pci_cap_saved_state *save_state;
>> +     u16 *cap;
>> +
>> +     save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
>> +     if (!save_state)
>> +             return;
>> +
>> +     cap = (u16 *)&save_state->cap.data[0];
>> +     i++;
>> +     pcie_capability_write_word(dev, PCI_EXP_LNKCTL, cap[i++]);
>> +}
>> +
>>   void pci_save_aspm_l1ss_state(struct pci_dev *dev)
>>   {
>>       struct pci_cap_saved_state *save_state;
>> @@ -849,6 +883,12 @@ static void pcie_config_aspm_link(struct 
>> pcie_link_state *link, u32 state)
>>               pcie_config_aspm_dev(parent, upstream);
>>
>>       link->aspm_enabled = state;
>> +
>> +     /* Update latest ASPM configuration in saved context */
>> +     pci_save_aspm_state(link->downstream);
>> +     pci_save_aspm_l1ss_state(link->downstream);
>> +     pci_save_aspm_state(parent);
>> +     pci_save_aspm_l1ss_state(parent);
>>   }
>>
>>   static void pcie_config_aspm_path(struct pcie_link_state *link)
Wysocki, Rafael J Jan. 26, 2023, 11:43 a.m. UTC | #3
On 1/25/2023 6:22 PM, Vidya Sagar wrote:
>
>
> On 1/25/2023 8:31 PM, Wysocki, Rafael J wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> On 1/25/2023 2:38 PM, Vidya Sagar wrote:
>>> Many PCIe device drivers save the configuration state of their 
>>> respective
>>> devices during probe and restore the same when their 'slot_reset' hook
>>> is called through PCIe Error Recovery System.
>>> If the system has a change in ASPM policy after the driver's probe is
>>> called and before error event occurred, 'slot_reset' hook restores the
>>> PCIe configuration state to what it was at the time of probe but not 
>>> with
>>> what it was just before the occurrence of the error event.
>>> This effectively leads to a mismatch in the ASPM configuration between
>>> the device and its upstream parent device.
>>> This patch addresses that issue by updating the saved configuration 
>>> state
>>> of the device with the latest info whenever there is a change w.r.t 
>>> ASPM
>>> policy.
>>>
>>> Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
>>
>> If it is a bug fix (which I think it is), a Fixes tag should be present
>> here.
>
> It is kind of a bug fix but I couldn't pin point to any particular 
> commit that would have introduced it.
>
>>
>> If the reporter's names are known, Reported-by tags should be present
>> here too.
>
> I was experimenting with the error handling code and happen to find this.
>
>>
>> If anyone except for you has tested this patch, a Tested-by tag should
>> be present here.
>
> Only I tested this patch for now. It would be great if more 
> verification is done on this patch.
>
Fair enough.

Please feel free to add

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

to this patch.


>
>>
>>> ---
>>>   drivers/pci/pci.h       |  4 ++++
>>>   drivers/pci/pcie/aspm.c | 40 ++++++++++++++++++++++++++++++++++++++++
>>>   2 files changed, 44 insertions(+)
>>>
>>> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>>> index 9ed3b5550043..f4a91d4fe96d 100644
>>> --- a/drivers/pci/pci.h
>>> +++ b/drivers/pci/pci.h
>>> @@ -566,12 +566,16 @@ bool pcie_wait_for_link(struct pci_dev *pdev, 
>>> bool active);
>>>   void pcie_aspm_init_link_state(struct pci_dev *pdev);
>>>   void pcie_aspm_exit_link_state(struct pci_dev *pdev);
>>>   void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
>>> +void pci_save_aspm_state(struct pci_dev *dev);
>>> +void pci_restore_aspm_state(struct pci_dev *dev);
>>>   void pci_save_aspm_l1ss_state(struct pci_dev *dev);
>>>   void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
>>>   #else
>>>   static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) 
>>> { }
>>>   static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) 
>>> { }
>>>   static inline void pcie_aspm_powersave_config_link(struct pci_dev 
>>> *pdev) { }
>>> +static inline void pci_save_aspm_state(struct pci_dev *dev) { }
>>> +static inline void pci_restore_aspm_state(struct pci_dev *dev) { }
>>>   static inline void pci_save_aspm_l1ss_state(struct pci_dev *dev) { }
>>>   static inline void pci_restore_aspm_l1ss_state(struct pci_dev 
>>> *dev) { }
>>>   #endif
>>> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
>>> index 53a1fa306e1e..f25e0440d36b 100644
>>> --- a/drivers/pci/pcie/aspm.c
>>> +++ b/drivers/pci/pcie/aspm.c
>>> @@ -151,6 +151,7 @@ static void pcie_set_clkpm_nocheck(struct 
>>> pcie_link_state *link, int enable)
>>> PCI_EXP_LNKCTL_CLKREQ_EN,
>>>                                                  val);
>>>       link->clkpm_enabled = !!enable;
>>> +     pci_save_aspm_state(child);
>>>   }
>>>
>>>   static void pcie_set_clkpm(struct pcie_link_state *link, int enable)
>>> @@ -757,6 +758,39 @@ static void pcie_config_aspm_l1ss(struct 
>>> pcie_link_state *link, u32 state)
>>>                               PCI_L1SS_CTL1_L1SS_MASK, val);
>>>   }
>>>
>>> +void pci_save_aspm_state(struct pci_dev *dev)
>>> +{
>>> +     int i = 0;
>>> +     struct pci_cap_saved_state *save_state;
>>> +     u16 *cap;
>>> +
>>> +     if (!pci_is_pcie(dev))
>>> +             return;
>>> +
>>> +     save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
>>> +     if (!save_state)
>>> +             return;
>>> +
>>> +     cap = (u16 *)&save_state->cap.data[0];
>>> +     i++;
>>> +     pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &cap[i++]);
>>> +}
>>> +
>>> +void pci_restore_aspm_state(struct pci_dev *dev)
>>> +{
>>> +     int i = 0;
>>> +     struct pci_cap_saved_state *save_state;
>>> +     u16 *cap;
>>> +
>>> +     save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
>>> +     if (!save_state)
>>> +             return;
>>> +
>>> +     cap = (u16 *)&save_state->cap.data[0];
>>> +     i++;
>>> +     pcie_capability_write_word(dev, PCI_EXP_LNKCTL, cap[i++]);
>>> +}
>>> +
>>>   void pci_save_aspm_l1ss_state(struct pci_dev *dev)
>>>   {
>>>       struct pci_cap_saved_state *save_state;
>>> @@ -849,6 +883,12 @@ static void pcie_config_aspm_link(struct 
>>> pcie_link_state *link, u32 state)
>>>               pcie_config_aspm_dev(parent, upstream);
>>>
>>>       link->aspm_enabled = state;
>>> +
>>> +     /* Update latest ASPM configuration in saved context */
>>> +     pci_save_aspm_state(link->downstream);
>>> +     pci_save_aspm_l1ss_state(link->downstream);
>>> +     pci_save_aspm_state(parent);
>>> +     pci_save_aspm_l1ss_state(parent);
>>>   }
>>>
>>>   static void pcie_config_aspm_path(struct pcie_link_state *link)
Kuppuswamy Sathyanarayanan Jan. 26, 2023, 9:22 p.m. UTC | #4
Hi,

On 1/25/23 5:38 AM, Vidya Sagar wrote:
> Many PCIe device drivers save the configuration state of their respective
> devices during probe and restore the same when their 'slot_reset' hook
> is called through PCIe Error Recovery System.
> If the system has a change in ASPM policy after the driver's probe is
> called and before error event occurred, 'slot_reset' hook restores the
> PCIe configuration state to what it was at the time of probe but not with
> what it was just before the occurrence of the error event.
> This effectively leads to a mismatch in the ASPM configuration between
> the device and its upstream parent device.
> This patch addresses that issue by updating the saved configuration state
> of the device with the latest info whenever there is a change w.r.t ASPM
> policy.

Do we need two save/restore calls for ASPM function? Is it not possible
to extend pci_save_aspm_l1ss_state() to meet your need?

> 
> Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
> ---
>  drivers/pci/pci.h       |  4 ++++
>  drivers/pci/pcie/aspm.c | 40 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 44 insertions(+)
> 
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 9ed3b5550043..f4a91d4fe96d 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -566,12 +566,16 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
>  void pcie_aspm_init_link_state(struct pci_dev *pdev);
>  void pcie_aspm_exit_link_state(struct pci_dev *pdev);
>  void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
> +void pci_save_aspm_state(struct pci_dev *dev);
> +void pci_restore_aspm_state(struct pci_dev *dev);
>  void pci_save_aspm_l1ss_state(struct pci_dev *dev);
>  void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
>  #else
>  static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
>  static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
>  static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
> +static inline void pci_save_aspm_state(struct pci_dev *dev) { }
> +static inline void pci_restore_aspm_state(struct pci_dev *dev) { }
>  static inline void pci_save_aspm_l1ss_state(struct pci_dev *dev) { }
>  static inline void pci_restore_aspm_l1ss_state(struct pci_dev *dev) { }
>  #endif
> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> index 53a1fa306e1e..f25e0440d36b 100644
> --- a/drivers/pci/pcie/aspm.c
> +++ b/drivers/pci/pcie/aspm.c
> @@ -151,6 +151,7 @@ static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable)
>  						   PCI_EXP_LNKCTL_CLKREQ_EN,
>  						   val);
>  	link->clkpm_enabled = !!enable;
> +	pci_save_aspm_state(child);

Add some details about this change to the commit log. Currently, you have talked only
about the ASPM policy change issue.

>  }
>  
>  static void pcie_set_clkpm(struct pcie_link_state *link, int enable)
> @@ -757,6 +758,39 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
>  				PCI_L1SS_CTL1_L1SS_MASK, val);
>  }
>  
> +void pci_save_aspm_state(struct pci_dev *dev)
> +{
> +	int i = 0;
> +	struct pci_cap_saved_state *save_state;
> +	u16 *cap;
> +
> +	if (!pci_is_pcie(dev))
> +		return;
> +
> +	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
> +	if (!save_state)
> +		return;
> +
> +	cap = (u16 *)&save_state->cap.data[0];
> +	i++;
> +	pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &cap[i++]);
> +}
> +
> +void pci_restore_aspm_state(struct pci_dev *dev)
> +{
> +	int i = 0;
> +	struct pci_cap_saved_state *save_state;
> +	u16 *cap;
> +
> +	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
> +	if (!save_state)
> +		return;
> +
> +	cap = (u16 *)&save_state->cap.data[0];
> +	i++;
> +	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, cap[i++]);
> +}
> +

Don't you need to add this restore call in pci_restore_state()?

>  void pci_save_aspm_l1ss_state(struct pci_dev *dev)
>  {
>  	struct pci_cap_saved_state *save_state;
> @@ -849,6 +883,12 @@ static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state)
>  		pcie_config_aspm_dev(parent, upstream);
>  
>  	link->aspm_enabled = state;
> +
> +	/* Update latest ASPM configuration in saved context */
> +	pci_save_aspm_state(link->downstream);
> +	pci_save_aspm_l1ss_state(link->downstream);
> +	pci_save_aspm_state(parent);
> +	pci_save_aspm_l1ss_state(parent);
>  }
>  
>  static void pcie_config_aspm_path(struct pcie_link_state *link)
Bjorn Helgaas Jan. 27, 2023, 8:10 p.m. UTC | #5
On Wed, Jan 25, 2023 at 07:08:30PM +0530, Vidya Sagar wrote:
> Many PCIe device drivers save the configuration state of their respective
> devices during probe and restore the same when their 'slot_reset' hook
> is called through PCIe Error Recovery System.
> If the system has a change in ASPM policy after the driver's probe is
> called and before error event occurred, 'slot_reset' hook restores the
> PCIe configuration state to what it was at the time of probe but not with
> what it was just before the occurrence of the error event.
> This effectively leads to a mismatch in the ASPM configuration between
> the device and its upstream parent device.
> This patch addresses that issue by updating the saved configuration state
> of the device with the latest info whenever there is a change w.r.t ASPM
> policy.

Please use blank lines between paragraphs.  Inferring "new paragraph"
from "last line was shorter than usual" is error-prone and hard to
read.

Omit "this patch" (that part is obvious) and use imperative mood:

  https://chris.beams.io/posts/git-commit/
  https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?id=v6.0#n94
Bjorn Helgaas Jan. 28, 2023, 6:26 p.m. UTC | #6
On Wed, Jan 25, 2023 at 07:08:30PM +0530, Vidya Sagar wrote:
> Many PCIe device drivers save the configuration state of their respective
> devices during probe and restore the same when their 'slot_reset' hook
> is called through PCIe Error Recovery System.

This strategy of simply restoring config space after a reset is
common, but I think it's only a 90% solution.

After reset, the device is basically in a "fresh poweron" state [1].
At boot-time or for a hot-added device, we do a lot of setup when we
enumerate the device, and assuming that:

  - device reset, plus
  - current state in the struct pci_dev, plus
  - restoring config space

gets all the device and kernel state to the same place is a pretty big
assumption.

That said, we're pretty invested in this strategy for now, and I think
what you propose here is definitely an improvement.  Minor comments on
the implementation below.

Bjorn

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/PCI/pci-error-recovery.rst?id=v6.1#n277

> If the system has a change in ASPM policy after the driver's probe is
> called and before error event occurred, 'slot_reset' hook restores the
> PCIe configuration state to what it was at the time of probe but not with
> what it was just before the occurrence of the error event.
> This effectively leads to a mismatch in the ASPM configuration between
> the device and its upstream parent device.
> This patch addresses that issue by updating the saved configuration state
> of the device with the latest info whenever there is a change w.r.t ASPM
> policy.
>
> Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
> ---
>  drivers/pci/pci.h       |  4 ++++
>  drivers/pci/pcie/aspm.c | 40 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 44 insertions(+)

> +++ b/drivers/pci/pci.h
> +void pci_save_aspm_state(struct pci_dev *dev);
> +void pci_restore_aspm_state(struct pci_dev *dev);

This patch only adds calls to these functions in aspm.c, so it doesn't
look like we need declarations here or stubs below.

> +static inline void pci_save_aspm_state(struct pci_dev *dev) { }
> +static inline void pci_restore_aspm_state(struct pci_dev *dev) { }

> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> index 53a1fa306e1e..f25e0440d36b 100644
> --- a/drivers/pci/pcie/aspm.c
> +++ b/drivers/pci/pcie/aspm.c
> @@ -151,6 +151,7 @@ static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable)
>  						   PCI_EXP_LNKCTL_CLKREQ_EN,
>  						   val);
>  	link->clkpm_enabled = !!enable;
> +	pci_save_aspm_state(child);
>  }
>  
>  static void pcie_set_clkpm(struct pcie_link_state *link, int enable)
> @@ -757,6 +758,39 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
>  				PCI_L1SS_CTL1_L1SS_MASK, val);
>  }
>  
> +void pci_save_aspm_state(struct pci_dev *dev)

I might be missing something because these look like they should be
static.  But the declarations and these being non-static suggest that
you might have something more in mind that isn't part of this patch?

Move these save-state functions higher up if necessary to resolve the
forward reference from pcie_set_clkpm_nocheck().

> +{
> +	int i = 0;
> +	struct pci_cap_saved_state *save_state;
> +	u16 *cap;
> +
> +	if (!pci_is_pcie(dev))
> +		return;
> +
> +	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
> +	if (!save_state)
> +		return;
> +
> +	cap = (u16 *)&save_state->cap.data[0];
> +	i++;

"i" looks unnecessary, but I guess I see what you're doing --
mirroring the structure of pci_save_pcie_state() to make sure we put
LNKCTL in the correct element of cap[].

> +	pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &cap[i++]);
> +}
> +
> +void pci_restore_aspm_state(struct pci_dev *dev)

No callers for this?  And I don't see why you would *need* callers;
this should be restored by pci_restore_pcie_state() already.  So this
looks like it could be removed completely.

> +{
> +	int i = 0;
> +	struct pci_cap_saved_state *save_state;
> +	u16 *cap;
> +
> +	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
> +	if (!save_state)
> +		return;
> +
> +	cap = (u16 *)&save_state->cap.data[0];
> +	i++;
> +	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, cap[i++]);
> +}
> +
>  void pci_save_aspm_l1ss_state(struct pci_dev *dev)
>  {
>  	struct pci_cap_saved_state *save_state;
> @@ -849,6 +883,12 @@ static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state)
>  		pcie_config_aspm_dev(parent, upstream);
>  
>  	link->aspm_enabled = state;
> +
> +	/* Update latest ASPM configuration in saved context */
> +	pci_save_aspm_state(link->downstream);
> +	pci_save_aspm_l1ss_state(link->downstream);
> +	pci_save_aspm_state(parent);
> +	pci_save_aspm_l1ss_state(parent);
>  }
>  
>  static void pcie_config_aspm_path(struct pcie_link_state *link)
> -- 
> 2.17.1
>
diff mbox series

Patch

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 9ed3b5550043..f4a91d4fe96d 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -566,12 +566,16 @@  bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
 void pcie_aspm_init_link_state(struct pci_dev *pdev);
 void pcie_aspm_exit_link_state(struct pci_dev *pdev);
 void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
+void pci_save_aspm_state(struct pci_dev *dev);
+void pci_restore_aspm_state(struct pci_dev *dev);
 void pci_save_aspm_l1ss_state(struct pci_dev *dev);
 void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
 #else
 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
 static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
 static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
+static inline void pci_save_aspm_state(struct pci_dev *dev) { }
+static inline void pci_restore_aspm_state(struct pci_dev *dev) { }
 static inline void pci_save_aspm_l1ss_state(struct pci_dev *dev) { }
 static inline void pci_restore_aspm_l1ss_state(struct pci_dev *dev) { }
 #endif
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 53a1fa306e1e..f25e0440d36b 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -151,6 +151,7 @@  static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable)
 						   PCI_EXP_LNKCTL_CLKREQ_EN,
 						   val);
 	link->clkpm_enabled = !!enable;
+	pci_save_aspm_state(child);
 }
 
 static void pcie_set_clkpm(struct pcie_link_state *link, int enable)
@@ -757,6 +758,39 @@  static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
 				PCI_L1SS_CTL1_L1SS_MASK, val);
 }
 
+void pci_save_aspm_state(struct pci_dev *dev)
+{
+	int i = 0;
+	struct pci_cap_saved_state *save_state;
+	u16 *cap;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
+	if (!save_state)
+		return;
+
+	cap = (u16 *)&save_state->cap.data[0];
+	i++;
+	pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &cap[i++]);
+}
+
+void pci_restore_aspm_state(struct pci_dev *dev)
+{
+	int i = 0;
+	struct pci_cap_saved_state *save_state;
+	u16 *cap;
+
+	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
+	if (!save_state)
+		return;
+
+	cap = (u16 *)&save_state->cap.data[0];
+	i++;
+	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, cap[i++]);
+}
+
 void pci_save_aspm_l1ss_state(struct pci_dev *dev)
 {
 	struct pci_cap_saved_state *save_state;
@@ -849,6 +883,12 @@  static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state)
 		pcie_config_aspm_dev(parent, upstream);
 
 	link->aspm_enabled = state;
+
+	/* Update latest ASPM configuration in saved context */
+	pci_save_aspm_state(link->downstream);
+	pci_save_aspm_l1ss_state(link->downstream);
+	pci_save_aspm_state(parent);
+	pci_save_aspm_l1ss_state(parent);
 }
 
 static void pcie_config_aspm_path(struct pcie_link_state *link)