diff mbox

[v2] pci/aspm: Remove CONFIG_PCIEASPM_DEBUG

Message ID 20180510233912.96454-1-rajatja@google.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Rajat Jain May 10, 2018, 11:39 p.m. UTC
Currently, the linux kernel disables ASPM when a device is
removed from the kernel. But it is not enabled again when
a new device is added on that slot even if it was originally
enabled (by the BIOS) when the system booted up (assuming
POLICY_DEFAULT).

This was earlier discussed here:
https://www.spinics.net/lists/linux-pci/msg60212.html

And some suggestions from Bjorn here:
https://www.spinics.net/lists/linux-pci/msg60541.html

This patch picks up one of the suggestion, to remove the
CONFIG_PCIEASPM_DEBUG and thus make the code always
avilable. This provides control to userspace to control
ASPM on a per slot / device basis using sysfs interface.

Signed-off-by: Rajat Jain <rajatja@google.com>
---
v2: provide function definitions for !CONFIG_PCIEASPM case

 drivers/pci/pci.h        | 8 ++------
 drivers/pci/pcie/Kconfig | 8 --------
 drivers/pci/pcie/aspm.c  | 2 --
 3 files changed, 2 insertions(+), 16 deletions(-)

Comments

Rajat Jain June 5, 2018, 10:15 p.m. UTC | #1
Hello Bjorn,

Just checking to see if this one fell through the cracks? It isn't
solving any issues, but providing a tool for ASPM tweaking in user
space, which may be useful.

Thanks,

Rajat
On Thu, May 10, 2018 at 4:39 PM Rajat Jain <rajatja@google.com> wrote:
>
> Currently, the linux kernel disables ASPM when a device is
> removed from the kernel. But it is not enabled again when
> a new device is added on that slot even if it was originally
> enabled (by the BIOS) when the system booted up (assuming
> POLICY_DEFAULT).
>
> This was earlier discussed here:
> https://www.spinics.net/lists/linux-pci/msg60212.html
>
> And some suggestions from Bjorn here:
> https://www.spinics.net/lists/linux-pci/msg60541.html
>
> This patch picks up one of the suggestion, to remove the
> CONFIG_PCIEASPM_DEBUG and thus make the code always
> avilable. This provides control to userspace to control
> ASPM on a per slot / device basis using sysfs interface.
>
> Signed-off-by: Rajat Jain <rajatja@google.com>
> ---
> v2: provide function definitions for !CONFIG_PCIEASPM case
>
>  drivers/pci/pci.h        | 8 ++------
>  drivers/pci/pcie/Kconfig | 8 --------
>  drivers/pci/pcie/aspm.c  | 2 --
>  3 files changed, 2 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 023f7cf25bff..b953b2349ca1 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -358,17 +358,13 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev);
>  void pcie_aspm_exit_link_state(struct pci_dev *pdev);
>  void pcie_aspm_pm_state_change(struct pci_dev *pdev);
>  void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
> +void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev);
> +void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev);
>  #else
>  static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
>  static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
>  static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { }
>  static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
> -#endif
> -
> -#ifdef CONFIG_PCIEASPM_DEBUG
> -void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev);
> -void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev);
> -#else
>  static inline void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev) { }
>  static inline void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev) { }
>  #endif
> diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
> index b12e28b3d8f9..089b9f559d88 100644
> --- a/drivers/pci/pcie/Kconfig
> +++ b/drivers/pci/pcie/Kconfig
> @@ -46,14 +46,6 @@ config PCIEASPM
>
>           When in doubt, say Y.
>
> -config PCIEASPM_DEBUG
> -       bool "Debug PCI Express ASPM"
> -       depends on PCIEASPM
> -       default n
> -       help
> -         This enables PCI Express ASPM debug support. It will add per-device
> -         interface to control ASPM.
> -
>  choice
>         prompt "Default ASPM policy"
>         default PCIEASPM_DEFAULT
> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> index c687c817b47d..8ffc13d42baa 100644
> --- a/drivers/pci/pcie/aspm.c
> +++ b/drivers/pci/pcie/aspm.c
> @@ -1161,7 +1161,6 @@ static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
>  module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
>         NULL, 0644);
>
> -#ifdef CONFIG_PCIEASPM_DEBUG
>  static ssize_t link_state_show(struct device *dev,
>                 struct device_attribute *attr,
>                 char *buf)
> @@ -1264,7 +1263,6 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
>                 sysfs_remove_file_from_group(&pdev->dev.kobj,
>                         &dev_attr_clk_ctl.attr, power_group);
>  }
> -#endif
>
>  static int __init pcie_aspm_disable(char *str)
>  {
> --
> 2.17.0.441.gb46fe60e1d-goog
>
Sinan Kaya June 9, 2018, 11:49 p.m. UTC | #2
> On Thu, May 10, 2018 at 4:39 PM Rajat Jain <rajatja@google.com> wrote:
>>
>> Currently, the linux kernel disables ASPM when a device is
>> removed from the kernel. But it is not enabled again when
>> a new device is added on that slot even if it was originally
>> enabled (by the BIOS) when the system booted up (assuming
>> POLICY_DEFAULT).
>>
>> This was earlier discussed here:
>> https://www.spinics.net/lists/linux-pci/msg60212.html
>>
>> And some suggestions from Bjorn here:
>> https://www.spinics.net/lists/linux-pci/msg60541.html
>>
>> This patch picks up one of the suggestion, to remove the
>> CONFIG_PCIEASPM_DEBUG and thus make the code always
>> avilable. This provides control to userspace to control
>> ASPM on a per slot / device basis using sysfs interface.
>>
>> Signed-off-by: Rajat Jain <rajatja@google.com>
>> ---
>> v2: provide function definitions for !CONFIG_PCIEASPM case

Reviewed-by: Sinan Kaya <okaya@codeaurora.org>
Bjorn Helgaas June 29, 2018, 11:27 p.m. UTC | #3
On Thu, May 10, 2018 at 04:39:12PM -0700, Rajat Jain wrote:
> Currently, the linux kernel disables ASPM when a device is
> removed from the kernel. But it is not enabled again when
> a new device is added on that slot even if it was originally
> enabled (by the BIOS) when the system booted up (assuming
> POLICY_DEFAULT).
> 
> This was earlier discussed here:
> https://www.spinics.net/lists/linux-pci/msg60212.html
> 
> And some suggestions from Bjorn here:
> https://www.spinics.net/lists/linux-pci/msg60541.html
> 
> This patch picks up one of the suggestion, to remove the
> CONFIG_PCIEASPM_DEBUG and thus make the code always
> avilable. This provides control to userspace to control
> ASPM on a per slot / device basis using sysfs interface.
> 
> Signed-off-by: Rajat Jain <rajatja@google.com>

Applied with Sinan's reviewed-by to pci/aspm for v4.19, thanks!

> ---
> v2: provide function definitions for !CONFIG_PCIEASPM case
> 
>  drivers/pci/pci.h        | 8 ++------
>  drivers/pci/pcie/Kconfig | 8 --------
>  drivers/pci/pcie/aspm.c  | 2 --
>  3 files changed, 2 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 023f7cf25bff..b953b2349ca1 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -358,17 +358,13 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev);
>  void pcie_aspm_exit_link_state(struct pci_dev *pdev);
>  void pcie_aspm_pm_state_change(struct pci_dev *pdev);
>  void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
> +void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev);
> +void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev);
>  #else
>  static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
>  static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
>  static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { }
>  static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
> -#endif
> -
> -#ifdef CONFIG_PCIEASPM_DEBUG
> -void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev);
> -void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev);
> -#else
>  static inline void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev) { }
>  static inline void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev) { }
>  #endif
> diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
> index b12e28b3d8f9..089b9f559d88 100644
> --- a/drivers/pci/pcie/Kconfig
> +++ b/drivers/pci/pcie/Kconfig
> @@ -46,14 +46,6 @@ config PCIEASPM
>  
>  	  When in doubt, say Y.
>  
> -config PCIEASPM_DEBUG
> -	bool "Debug PCI Express ASPM"
> -	depends on PCIEASPM
> -	default n
> -	help
> -	  This enables PCI Express ASPM debug support. It will add per-device
> -	  interface to control ASPM.
> -
>  choice
>  	prompt "Default ASPM policy"
>  	default PCIEASPM_DEFAULT
> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> index c687c817b47d..8ffc13d42baa 100644
> --- a/drivers/pci/pcie/aspm.c
> +++ b/drivers/pci/pcie/aspm.c
> @@ -1161,7 +1161,6 @@ static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
>  module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
>  	NULL, 0644);
>  
> -#ifdef CONFIG_PCIEASPM_DEBUG
>  static ssize_t link_state_show(struct device *dev,
>  		struct device_attribute *attr,
>  		char *buf)
> @@ -1264,7 +1263,6 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
>  		sysfs_remove_file_from_group(&pdev->dev.kobj,
>  			&dev_attr_clk_ctl.attr, power_group);
>  }
> -#endif
>  
>  static int __init pcie_aspm_disable(char *str)
>  {
> -- 
> 2.17.0.441.gb46fe60e1d-goog
>
Bjorn Helgaas July 27, 2018, 8:26 p.m. UTC | #4
[+cc Rafael, Richard, Carlos, Pali, Takashi, Andy, Colin for question
about how to expose ASPM power management in sysfs]

On Thu, May 10, 2018 at 04:39:12PM -0700, Rajat Jain wrote:
> ...
> And some suggestions from Bjorn here:
> https://www.spinics.net/lists/linux-pci/msg60541.html
> 
> This patch picks up one of the suggestion, to remove the
> CONFIG_PCIEASPM_DEBUG and thus make the code always
> avilable. This provides control to userspace to control
> ASPM on a per slot / device basis using sysfs interface.

TL;DR: When CONFIG_PCIEASPM_DEBUG=y, Linux makes ASPM control files
       visible in sysfs, associated with the upstream end (e.g., Root
       Port) of a link.  Should these files be associated with the
       downstream end (e.g., NIC, GPU, etc) instead?

I think we do need to make ASPM control files visible in sysfs, as
this patch does.  But I have a question about exactly *how* we want to
do this.  I had planned to merge this patch for v4.19, but I think
I'll postpone it until we figure this out.

ASPM is a power management feature of a PCIe link, and it affects the
devices on both ends of that link.  The upstream end (closest to the
CPU) is typically a Root Port, and the downstream end is typically an
Endpoint (e.g., a GPU, NIC, or NVMe device).  For example, my laptop
has these devices:

  00:1c.2 Intel Root Port to [bus 04]
  04:00.0 Intel 8260 Wireless NIC

There's a PCIe link between these two devices, and if both ends
support it, ASPM reduces power usage when the NIC is idle.  The
hardware reduces power usage automatically; the kernel only needs to
configure ASPM.

ASPM affects performance as well as power, so we have knobs to control
the tradeoff.  Starting with the big hammers, we currently have:

  - Kernel build-time setting (CONFIG_PCIEASPM_PERFORMANCE, etc).
    Requires kernel rebuild and reboot.

  - "pcie_aspm=X" kernel boot parameter.  Can only enable/disable and
    requires a reboot.

  - /sys/module/pcie_aspm/parameters/policy file.  At any time, root
    can set the system-wide ASPM policy to one of these settings:

      + highest performance, highest power consumption
      + moderate power saving at cost of some performance
      + aggressive power saving at cost of more performance
      + use whatever setting BIOS configured

  - Many /sys/devices/pci0000:00/0000:00:1c.2/power/link_state files.
    At any time, root can set the ASPM policy to one of the above
    settings, but for individual devices.  Currently these files are
    only available when CONFIG_PCIEASPM_DEBUG=y (Red Hat does not
    enable this, but Ubuntu does).

The patch below changes it so these /sys/devices/.../link_state files
*always* exist, regardless of CONFIG_PCIEASPM_DEBUG.

The question is where those sysfs files should be.  Currently they are
associated with the device at the *upstream* end of the link.  In the
example above, they're associated with the Root Port:

  /sys/devices/pci0000:00/0000:00:1c.2/power/link_state

I don't know if that's the right place, or if they should be
associated with the device at the *downstream* end of the link, i.e.,
04:00.0.  The downstream end might be better because:

  - It's easier to associate the downstream end with a device the user
    cares about, e.g., a NIC, GPU, etc.  This is mostly a user-
    interface issue.

  - A link can lead to a multi-function device, and the spec allows
    those functions to have different ASPM settings (see PCIe r4.0,
    sec 5.4.1).  With the sysfs files at the upstream end of the link,
    we have no way to configure those functions individually.

Any thoughts?

> ...
> diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
> index b12e28b3d8f9..089b9f559d88 100644
> --- a/drivers/pci/pcie/Kconfig
> +++ b/drivers/pci/pcie/Kconfig
> @@ -46,14 +46,6 @@ config PCIEASPM
>  
>  	  When in doubt, say Y.
>  
> -config PCIEASPM_DEBUG
> -	bool "Debug PCI Express ASPM"
> -	depends on PCIEASPM
> -	default n
> -	help
> -	  This enables PCI Express ASPM debug support. It will add per-device
> -	  interface to control ASPM.
> -
>  choice
>  	prompt "Default ASPM policy"
>  	default PCIEASPM_DEFAULT
> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> index c687c817b47d..8ffc13d42baa 100644
> --- a/drivers/pci/pcie/aspm.c
> +++ b/drivers/pci/pcie/aspm.c
> @@ -1161,7 +1161,6 @@ static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
>  module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
>  	NULL, 0644);
>  
> -#ifdef CONFIG_PCIEASPM_DEBUG
>  static ssize_t link_state_show(struct device *dev,
>  		struct device_attribute *attr,
>  		char *buf)
> @@ -1264,7 +1263,6 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
>  		sysfs_remove_file_from_group(&pdev->dev.kobj,
>  			&dev_attr_clk_ctl.attr, power_group);
>  }
> -#endif
>  
>  static int __init pcie_aspm_disable(char *str)
>  {
> -- 
> 2.17.0.441.gb46fe60e1d-goog
>
Takashi Iwai July 27, 2018, 9:03 p.m. UTC | #5
On Fri, 27 Jul 2018 22:26:19 +0200,
Bjorn Helgaas wrote:
> 
> [+cc Rafael, Richard, Carlos, Pali, Takashi, Andy, Colin for question
> about how to expose ASPM power management in sysfs]
> 
> On Thu, May 10, 2018 at 04:39:12PM -0700, Rajat Jain wrote:
> > ...
> > And some suggestions from Bjorn here:
> > https://www.spinics.net/lists/linux-pci/msg60541.html
> > 
> > This patch picks up one of the suggestion, to remove the
> > CONFIG_PCIEASPM_DEBUG and thus make the code always
> > avilable. This provides control to userspace to control
> > ASPM on a per slot / device basis using sysfs interface.
> 
> TL;DR: When CONFIG_PCIEASPM_DEBUG=y, Linux makes ASPM control files
>        visible in sysfs, associated with the upstream end (e.g., Root
>        Port) of a link.  Should these files be associated with the
>        downstream end (e.g., NIC, GPU, etc) instead?
> 
> I think we do need to make ASPM control files visible in sysfs, as
> this patch does.  But I have a question about exactly *how* we want to
> do this.  I had planned to merge this patch for v4.19, but I think
> I'll postpone it until we figure this out.
> 
> ASPM is a power management feature of a PCIe link, and it affects the
> devices on both ends of that link.  The upstream end (closest to the
> CPU) is typically a Root Port, and the downstream end is typically an
> Endpoint (e.g., a GPU, NIC, or NVMe device).  For example, my laptop
> has these devices:
> 
>   00:1c.2 Intel Root Port to [bus 04]
>   04:00.0 Intel 8260 Wireless NIC
> 
> There's a PCIe link between these two devices, and if both ends
> support it, ASPM reduces power usage when the NIC is idle.  The
> hardware reduces power usage automatically; the kernel only needs to
> configure ASPM.
> 
> ASPM affects performance as well as power, so we have knobs to control
> the tradeoff.  Starting with the big hammers, we currently have:
> 
>   - Kernel build-time setting (CONFIG_PCIEASPM_PERFORMANCE, etc).
>     Requires kernel rebuild and reboot.
> 
>   - "pcie_aspm=X" kernel boot parameter.  Can only enable/disable and
>     requires a reboot.
> 
>   - /sys/module/pcie_aspm/parameters/policy file.  At any time, root
>     can set the system-wide ASPM policy to one of these settings:
> 
>       + highest performance, highest power consumption
>       + moderate power saving at cost of some performance
>       + aggressive power saving at cost of more performance
>       + use whatever setting BIOS configured
> 
>   - Many /sys/devices/pci0000:00/0000:00:1c.2/power/link_state files.
>     At any time, root can set the ASPM policy to one of the above
>     settings, but for individual devices.  Currently these files are
>     only available when CONFIG_PCIEASPM_DEBUG=y (Red Hat does not
>     enable this, but Ubuntu does).
> 
> The patch below changes it so these /sys/devices/.../link_state files
> *always* exist, regardless of CONFIG_PCIEASPM_DEBUG.
> 
> The question is where those sysfs files should be.  Currently they are
> associated with the device at the *upstream* end of the link.  In the
> example above, they're associated with the Root Port:
> 
>   /sys/devices/pci0000:00/0000:00:1c.2/power/link_state
> 
> I don't know if that's the right place, or if they should be
> associated with the device at the *downstream* end of the link, i.e.,
> 04:00.0.  The downstream end might be better because:
> 
>   - It's easier to associate the downstream end with a device the user
>     cares about, e.g., a NIC, GPU, etc.  This is mostly a user-
>     interface issue.
> 
>   - A link can lead to a multi-function device, and the spec allows
>     those functions to have different ASPM settings (see PCIe r4.0,
>     sec 5.4.1).  With the sysfs files at the upstream end of the link,
>     we have no way to configure those functions individually.
> 
> Any thoughts?

Through your description, having a control in the downstream side
sounds convincing enough.  The only drawback I can think of is the
complication of setups; you'd need to adjust each downstream node.

Maybe one option would be to add a new policy "let downstream decide",
and the downstream sysfs has effect only when the upstream sets this
mode.  When the upstream sets to another (currently existing) mode, it
forces the mode to all downstream devices.


Just my quick $0.02.


thanks,

Takashi

> 
> > ...
> > diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
> > index b12e28b3d8f9..089b9f559d88 100644
> > --- a/drivers/pci/pcie/Kconfig
> > +++ b/drivers/pci/pcie/Kconfig
> > @@ -46,14 +46,6 @@ config PCIEASPM
> >  
> >  	  When in doubt, say Y.
> >  
> > -config PCIEASPM_DEBUG
> > -	bool "Debug PCI Express ASPM"
> > -	depends on PCIEASPM
> > -	default n
> > -	help
> > -	  This enables PCI Express ASPM debug support. It will add per-device
> > -	  interface to control ASPM.
> > -
> >  choice
> >  	prompt "Default ASPM policy"
> >  	default PCIEASPM_DEFAULT
> > diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> > index c687c817b47d..8ffc13d42baa 100644
> > --- a/drivers/pci/pcie/aspm.c
> > +++ b/drivers/pci/pcie/aspm.c
> > @@ -1161,7 +1161,6 @@ static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
> >  module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
> >  	NULL, 0644);
> >  
> > -#ifdef CONFIG_PCIEASPM_DEBUG
> >  static ssize_t link_state_show(struct device *dev,
> >  		struct device_attribute *attr,
> >  		char *buf)
> > @@ -1264,7 +1263,6 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
> >  		sysfs_remove_file_from_group(&pdev->dev.kobj,
> >  			&dev_attr_clk_ctl.attr, power_group);
> >  }
> > -#endif
> >  
> >  static int __init pcie_aspm_disable(char *str)
> >  {
> > -- 
> > 2.17.0.441.gb46fe60e1d-goog
> > 
>
Sinan Kaya July 29, 2018, 12:16 a.m. UTC | #6
On 7/27/2018 1:26 PM, Bjorn Helgaas wrote:
> - A link can lead to a multi-function device, and the spec allows
>      those functions to have different ASPM settings (see PCIe r4.0,
>      sec 5.4.1).  With the sysfs files at the upstream end of the link,
>      we have no way to configure those functions individually.

Even though we can set them individually, there is only one PCIe link
for single function as well as multi-function devices.

It would be a user mistake to allow individual PCIe functions with
different ASPM policies. (maybe, we should enforce it if we are not
doing it already).
Lukas Wunner July 30, 2018, 8:32 a.m. UTC | #7
On Fri, Jul 27, 2018 at 03:26:19PM -0500, Bjorn Helgaas wrote:
> The question is where those sysfs files should be.  Currently they are
> associated with the device at the *upstream* end of the link.  In the
> example above, they're associated with the Root Port:
> 
>   /sys/devices/pci0000:00/0000:00:1c.2/power/link_state
> 
> I don't know if that's the right place, or if they should be
> associated with the device at the *downstream* end of the link, i.e.,
> 04:00.0.  The downstream end might be better because:
> 
>   - It's easier to associate the downstream end with a device the user
>     cares about, e.g., a NIC, GPU, etc.  This is mostly a user-
>     interface issue.
> 
>   - A link can lead to a multi-function device, and the spec allows
>     those functions to have different ASPM settings (see PCIe r4.0,
>     sec 5.4.1).  With the sysfs files at the upstream end of the link,
>     we have no way to configure those functions individually.
> 
> Any thoughts?

Conceivably, could the downstream end not be enumerated at all?
(E.g. a hotplug or rescan is necessary for it to be enumerated?)

If so, the upstream end might be better because the ASPM settings
can be configured in advance, before the downstream end appears.

Thanks,

Lukas
Bjorn Helgaas July 30, 2018, 2:14 p.m. UTC | #8
On Sat, Jul 28, 2018 at 05:16:13PM -0700, Sinan Kaya wrote:
> On 7/27/2018 1:26 PM, Bjorn Helgaas wrote:
> > - A link can lead to a multi-function device, and the spec allows
> >      those functions to have different ASPM settings (see PCIe r4.0,
> >      sec 5.4.1).  With the sysfs files at the upstream end of the link,
> >      we have no way to configure those functions individually.
> 
> Even though we can set them individually, there is only one PCIe link
> for single function as well as multi-function devices.
> 
> It would be a user mistake to allow individual PCIe functions with
> different ASPM policies. (maybe, we should enforce it if we are not
> doing it already).

It's true that multi-function devices share a single PCIe link.

However, the end of sec 5.4.1 does make it clear that the functions
need not have the same ASPM configuration, and it gives rules for how
those different settings should affect the shared link.  Since it
mentions different ASPM Control fields for the different functions, I
assume the policy combining those per-function settings into the
single link behavior must be implemented in the hardware.

Obviously we don't support per-function ASPM settings today.  I don't
know whether there's any value in supporting them or not, but putting
the control files at the upstream end basically precludes us from ever
supporting them.

Bjorn
Sinan Kaya July 30, 2018, 4:08 p.m. UTC | #9
On 7/30/2018 7:14 AM, Bjorn Helgaas wrote:
> However, the end of sec 5.4.1 does make it clear that the functions
> need not have the same ASPM configuration, and it gives rules for how
> those different settings should affect the shared link.  Since it
> mentions different ASPM Control fields for the different functions, I
> assume the policy combining those per-function settings into the
> single link behavior must be implemented in the hardware.

Very interesting. This is news for me.
Rajat Jain July 30, 2018, 4:18 p.m. UTC | #10
On Fri, Jul 27, 2018 at 1:26 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> [+cc Rafael, Richard, Carlos, Pali, Takashi, Andy, Colin for question
> about how to expose ASPM power management in sysfs]
>
> On Thu, May 10, 2018 at 04:39:12PM -0700, Rajat Jain wrote:
>> ...
>> And some suggestions from Bjorn here:
>> https://www.spinics.net/lists/linux-pci/msg60541.html
>>
>> This patch picks up one of the suggestion, to remove the
>> CONFIG_PCIEASPM_DEBUG and thus make the code always
>> avilable. This provides control to userspace to control
>> ASPM on a per slot / device basis using sysfs interface.
>
> TL;DR: When CONFIG_PCIEASPM_DEBUG=y, Linux makes ASPM control files
>        visible in sysfs, associated with the upstream end (e.g., Root
>        Port) of a link.  Should these files be associated with the
>        downstream end (e.g., NIC, GPU, etc) instead?
>
> I think we do need to make ASPM control files visible in sysfs, as
> this patch does.  But I have a question about exactly *how* we want to
> do this.  I had planned to merge this patch for v4.19, but I think
> I'll postpone it until we figure this out.


Hi Bjorn,

Just a side note FYI, it is OK if you want to drop this, because we
anyway have this today as a config option so anyone who wants to use
these files can enable that config anyway.

Thanks,

Rajat

>
> ASPM is a power management feature of a PCIe link, and it affects the
> devices on both ends of that link.  The upstream end (closest to the
> CPU) is typically a Root Port, and the downstream end is typically an
> Endpoint (e.g., a GPU, NIC, or NVMe device).  For example, my laptop
> has these devices:
>
>   00:1c.2 Intel Root Port to [bus 04]
>   04:00.0 Intel 8260 Wireless NIC
>
> There's a PCIe link between these two devices, and if both ends
> support it, ASPM reduces power usage when the NIC is idle.  The
> hardware reduces power usage automatically; the kernel only needs to
> configure ASPM.
>
> ASPM affects performance as well as power, so we have knobs to control
> the tradeoff.  Starting with the big hammers, we currently have:
>
>   - Kernel build-time setting (CONFIG_PCIEASPM_PERFORMANCE, etc).
>     Requires kernel rebuild and reboot.
>
>   - "pcie_aspm=X" kernel boot parameter.  Can only enable/disable and
>     requires a reboot.
>
>   - /sys/module/pcie_aspm/parameters/policy file.  At any time, root
>     can set the system-wide ASPM policy to one of these settings:
>
>       + highest performance, highest power consumption
>       + moderate power saving at cost of some performance
>       + aggressive power saving at cost of more performance
>       + use whatever setting BIOS configured
>
>   - Many /sys/devices/pci0000:00/0000:00:1c.2/power/link_state files.
>     At any time, root can set the ASPM policy to one of the above
>     settings, but for individual devices.  Currently these files are
>     only available when CONFIG_PCIEASPM_DEBUG=y (Red Hat does not
>     enable this, but Ubuntu does).
>
> The patch below changes it so these /sys/devices/.../link_state files
> *always* exist, regardless of CONFIG_PCIEASPM_DEBUG.
>
> The question is where those sysfs files should be.  Currently they are
> associated with the device at the *upstream* end of the link.  In the
> example above, they're associated with the Root Port:
>
>   /sys/devices/pci0000:00/0000:00:1c.2/power/link_state
>
> I don't know if that's the right place, or if they should be
> associated with the device at the *downstream* end of the link, i.e.,
> 04:00.0.  The downstream end might be better because:
>
>   - It's easier to associate the downstream end with a device the user
>     cares about, e.g., a NIC, GPU, etc.  This is mostly a user-
>     interface issue.
>
>   - A link can lead to a multi-function device, and the spec allows
>     those functions to have different ASPM settings (see PCIe r4.0,
>     sec 5.4.1).  With the sysfs files at the upstream end of the link,
>     we have no way to configure those functions individually.
>
> Any thoughts?
>
>> ...
>> diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
>> index b12e28b3d8f9..089b9f559d88 100644
>> --- a/drivers/pci/pcie/Kconfig
>> +++ b/drivers/pci/pcie/Kconfig
>> @@ -46,14 +46,6 @@ config PCIEASPM
>>
>>         When in doubt, say Y.
>>
>> -config PCIEASPM_DEBUG
>> -     bool "Debug PCI Express ASPM"
>> -     depends on PCIEASPM
>> -     default n
>> -     help
>> -       This enables PCI Express ASPM debug support. It will add per-device
>> -       interface to control ASPM.
>> -
>>  choice
>>       prompt "Default ASPM policy"
>>       default PCIEASPM_DEFAULT
>> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
>> index c687c817b47d..8ffc13d42baa 100644
>> --- a/drivers/pci/pcie/aspm.c
>> +++ b/drivers/pci/pcie/aspm.c
>> @@ -1161,7 +1161,6 @@ static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
>>  module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
>>       NULL, 0644);
>>
>> -#ifdef CONFIG_PCIEASPM_DEBUG
>>  static ssize_t link_state_show(struct device *dev,
>>               struct device_attribute *attr,
>>               char *buf)
>> @@ -1264,7 +1263,6 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
>>               sysfs_remove_file_from_group(&pdev->dev.kobj,
>>                       &dev_attr_clk_ctl.attr, power_group);
>>  }
>> -#endif
>>
>>  static int __init pcie_aspm_disable(char *str)
>>  {
>> --
>> 2.17.0.441.gb46fe60e1d-goog
>>
Bjorn Helgaas July 30, 2018, 5:26 p.m. UTC | #11
On Mon, Jul 30, 2018 at 10:32:10AM +0200, Lukas Wunner wrote:
> On Fri, Jul 27, 2018 at 03:26:19PM -0500, Bjorn Helgaas wrote:
> > The question is where those sysfs files should be.  Currently they are
> > associated with the device at the *upstream* end of the link.  In the
> > example above, they're associated with the Root Port:
> > 
> >   /sys/devices/pci0000:00/0000:00:1c.2/power/link_state
> > 
> > I don't know if that's the right place, or if they should be
> > associated with the device at the *downstream* end of the link, i.e.,
> > 04:00.0.  The downstream end might be better because:
> > 
> >   - It's easier to associate the downstream end with a device the user
> >     cares about, e.g., a NIC, GPU, etc.  This is mostly a user-
> >     interface issue.
> > 
> >   - A link can lead to a multi-function device, and the spec allows
> >     those functions to have different ASPM settings (see PCIe r4.0,
> >     sec 5.4.1).  With the sysfs files at the upstream end of the link,
> >     we have no way to configure those functions individually.
> > 
> > Any thoughts?
> 
> Conceivably, could the downstream end not be enumerated at all?
> (E.g. a hotplug or rescan is necessary for it to be enumerated?)
> 
> If so, the upstream end might be better because the ASPM settings
> can be configured in advance, before the downstream end appears.

Yes, of course there may be ports (root ports or switch downstream
ports) that have nothing connected yet.  The ASPM Control on a
hot-added device would normally be 0 (ASPM disabled).  Usually we can
enable features on the upstream end before the downstream end, but
this sentence in sec 5.4.1.3 is concerning:

  Software must not enable L0s in either direction on a given Link
  unless components on both sides of the Link each support L0s;
  otherwise, the result is undefined.

If we can trust that statement, if we enable ASPM on the upstream end,
then hot-add a device that doesn't support L0s, the result may be
undefined.  So I'm a little wary of configuring ASPM before a
downstream device is present.

I think the sysfs control files are absent if there's no downstream
device, but we create them when the hot-add happens, in paths like
this:

  pciehp_configure_device
    pci_scan_slot(bus)
      pcie_aspm_init_link_state(bus->self)

It seems ugly to me that this ASPM init is attached to the *parent*
instead of being done somewhere like pci_configure_device().
Pali Rohár July 31, 2018, 8:13 a.m. UTC | #12
On Friday 27 July 2018 15:26:19 Bjorn Helgaas wrote:
> [+cc Rafael, Richard, Carlos, Pali, Takashi, Andy, Colin for question
> about how to expose ASPM power management in sysfs]
> 
> On Thu, May 10, 2018 at 04:39:12PM -0700, Rajat Jain wrote:
> > ...
> > And some suggestions from Bjorn here:
> > https://www.spinics.net/lists/linux-pci/msg60541.html
> > 
> > This patch picks up one of the suggestion, to remove the
> > CONFIG_PCIEASPM_DEBUG and thus make the code always
> > avilable. This provides control to userspace to control
> > ASPM on a per slot / device basis using sysfs interface.
> 
> TL;DR: When CONFIG_PCIEASPM_DEBUG=y, Linux makes ASPM control files
>        visible in sysfs, associated with the upstream end (e.g., Root
>        Port) of a link.  Should these files be associated with the
>        downstream end (e.g., NIC, GPU, etc) instead?
> 
> I think we do need to make ASPM control files visible in sysfs, as
> this patch does.  But I have a question about exactly *how* we want to
> do this.  I had planned to merge this patch for v4.19, but I think
> I'll postpone it until we figure this out.
> 
> ASPM is a power management feature of a PCIe link, and it affects the
> devices on both ends of that link.  The upstream end (closest to the
> CPU) is typically a Root Port, and the downstream end is typically an
> Endpoint (e.g., a GPU, NIC, or NVMe device).  For example, my laptop
> has these devices:
> 
>   00:1c.2 Intel Root Port to [bus 04]
>   04:00.0 Intel 8260 Wireless NIC
> 
> There's a PCIe link between these two devices, and if both ends
> support it, ASPM reduces power usage when the NIC is idle.  The
> hardware reduces power usage automatically; the kernel only needs to
> configure ASPM.
> 
> ASPM affects performance as well as power, so we have knobs to control
> the tradeoff.  Starting with the big hammers, we currently have:
> 
>   - Kernel build-time setting (CONFIG_PCIEASPM_PERFORMANCE, etc).
>     Requires kernel rebuild and reboot.
> 
>   - "pcie_aspm=X" kernel boot parameter.  Can only enable/disable and
>     requires a reboot.
> 
>   - /sys/module/pcie_aspm/parameters/policy file.  At any time, root
>     can set the system-wide ASPM policy to one of these settings:
> 
>       + highest performance, highest power consumption
>       + moderate power saving at cost of some performance
>       + aggressive power saving at cost of more performance
>       + use whatever setting BIOS configured
> 
>   - Many /sys/devices/pci0000:00/0000:00:1c.2/power/link_state files.
>     At any time, root can set the ASPM policy to one of the above
>     settings, but for individual devices.  Currently these files are
>     only available when CONFIG_PCIEASPM_DEBUG=y (Red Hat does not
>     enable this, but Ubuntu does).
> 
> The patch below changes it so these /sys/devices/.../link_state files
> *always* exist, regardless of CONFIG_PCIEASPM_DEBUG.
> 
> The question is where those sysfs files should be.  Currently they are
> associated with the device at the *upstream* end of the link.  In the
> example above, they're associated with the Root Port:
> 
>   /sys/devices/pci0000:00/0000:00:1c.2/power/link_state
> 
> I don't know if that's the right place, or if they should be
> associated with the device at the *downstream* end of the link, i.e.,
> 04:00.0.  The downstream end might be better because:
> 
>   - It's easier to associate the downstream end with a device the user
>     cares about, e.g., a NIC, GPU, etc.  This is mostly a user-
>     interface issue.
> 
>   - A link can lead to a multi-function device, and the spec allows
>     those functions to have different ASPM settings (see PCIe r4.0,
>     sec 5.4.1).  With the sysfs files at the upstream end of the link,
>     we have no way to configure those functions individually.
> 
> Any thoughts?

Hi Bjorn, in my opinion sysfs entries should at the downstream end. As I
think primary usage is to put "end" downstream device (e.g. wifi card)
into lower power mode to increase battery runtime on laptop.

Anyway, if sysfs entry is going to be moved from one place to another
maybe it would be better to give it a new name. Because currently it
sysfs entry is bound to the upstream device and if you change location,
then sysfs entry would change it logic.
diff mbox

Patch

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 023f7cf25bff..b953b2349ca1 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -358,17 +358,13 @@  void pcie_aspm_init_link_state(struct pci_dev *pdev);
 void pcie_aspm_exit_link_state(struct pci_dev *pdev);
 void pcie_aspm_pm_state_change(struct pci_dev *pdev);
 void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
+void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev);
+void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev);
 #else
 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
 static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
 static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { }
 static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
-#endif
-
-#ifdef CONFIG_PCIEASPM_DEBUG
-void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev);
-void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev);
-#else
 static inline void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev) { }
 static inline void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev) { }
 #endif
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index b12e28b3d8f9..089b9f559d88 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -46,14 +46,6 @@  config PCIEASPM
 
 	  When in doubt, say Y.
 
-config PCIEASPM_DEBUG
-	bool "Debug PCI Express ASPM"
-	depends on PCIEASPM
-	default n
-	help
-	  This enables PCI Express ASPM debug support. It will add per-device
-	  interface to control ASPM.
-
 choice
 	prompt "Default ASPM policy"
 	default PCIEASPM_DEFAULT
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index c687c817b47d..8ffc13d42baa 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -1161,7 +1161,6 @@  static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
 module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
 	NULL, 0644);
 
-#ifdef CONFIG_PCIEASPM_DEBUG
 static ssize_t link_state_show(struct device *dev,
 		struct device_attribute *attr,
 		char *buf)
@@ -1264,7 +1263,6 @@  void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
 		sysfs_remove_file_from_group(&pdev->dev.kobj,
 			&dev_attr_clk_ctl.attr, power_group);
 }
-#endif
 
 static int __init pcie_aspm_disable(char *str)
 {