diff mbox series

r8169: introduce polling method for link change

Message ID 20210603025414.226526-1-koba.ko@canonical.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series r8169: introduce polling method for link change | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Guessed tree name to be net-next
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cc_maintainers warning 1 maintainers not CCed: nic_swsd@realtek.com
netdev/source_inline fail Was 0 now: 2
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit fail Errors and warnings before: 0 this patch: 2
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns
netdev/build_allmodconfig_warn fail Errors and warnings before: 0 this patch: 2
netdev/header_inline success Link

Commit Message

Koba Ko June 3, 2021, 2:54 a.m. UTC
For RTL8106E, it's a Fast-ethernet chip.
If ASPM is enabled, the link chang interrupt wouldn't be triggered
immediately and must wait a very long time to get link change interrupt.
Even the link change interrupt isn't triggered, the phy link is already
established.

Introduce a polling method to watch the status of phy link and disable
the link change interrupt.
Also add a quirk for those realtek devices have the same issue.

Signed-off-by: Koba Ko <koba.ko@canonical.com>
---
 drivers/net/ethernet/realtek/r8169.h      |   2 +
 drivers/net/ethernet/realtek/r8169_main.c | 112 ++++++++++++++++++----
 2 files changed, 98 insertions(+), 16 deletions(-)

Comments

Heiner Kallweit June 3, 2021, 9:59 a.m. UTC | #1
On 03.06.2021 04:54, Koba Ko wrote:
> For RTL8106E, it's a Fast-ethernet chip.
> If ASPM is enabled, the link chang interrupt wouldn't be triggered
> immediately and must wait a very long time to get link change interrupt.
> Even the link change interrupt isn't triggered, the phy link is already
> established.
> 
At first please provide a full dmesg log and output of lspci -vv.
Do you have the firmware for the NIC loaded? Please provide "ethtool -i <if>"
output.

Does the issue affect link-down and/or link-up detection?
Do you have runtime pm enabled? Then, after 10s of link-down NIC goes to
D3hot and link-up detection triggers a PME.

> Introduce a polling method to watch the status of phy link and disable
> the link change interrupt.
> Also add a quirk for those realtek devices have the same issue.
> 
Which are the affected chip versions? Did you check with Realtek?
Your patch switches to polling for all Fast Ethernet versions,
and that's not what we want.

My suspicion would be that something is system-dependent. Else I think
we would have seen such a report before.

> Signed-off-by: Koba Ko <koba.ko@canonical.com>
> ---
>  drivers/net/ethernet/realtek/r8169.h      |   2 +
>  drivers/net/ethernet/realtek/r8169_main.c | 112 ++++++++++++++++++----
>  2 files changed, 98 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
> index 2728df46ec41..a8c71adb1b57 100644
> --- a/drivers/net/ethernet/realtek/r8169.h
> +++ b/drivers/net/ethernet/realtek/r8169.h
> @@ -11,6 +11,8 @@
>  #include <linux/types.h>
>  #include <linux/phy.h>
>  
> +#define RTL8169_LINK_TIMEOUT (1 * HZ)
> +
>  enum mac_version {
>  	/* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
>  	RTL_GIGA_MAC_VER_02,
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 2c89cde7da1e..70aacc83d641 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -178,6 +178,11 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
>  
>  MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
>  
> +static const struct pci_device_id rtl8169_linkChg_polling_enabled[] = {
> +	{ PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT },
> +	{ 0 }
> +};
> +

This doesn't seem to be used.

>  enum rtl_registers {
>  	MAC0		= 0,	/* Ethernet hardware address. */
>  	MAC4		= 4,
> @@ -618,6 +623,7 @@ struct rtl8169_private {
>  	u16 cp_cmd;
>  	u32 irq_mask;
>  	struct clk *clk;
> +	struct timer_list link_timer;
>  
>  	struct {
>  		DECLARE_BITMAP(flags, RTL_FLAG_MAX);
> @@ -1179,6 +1185,16 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
>  	RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
>  }
>  
> +static int rtl_link_chng_polling_quirk(struct rtl8169_private *tp)
> +{
> +	struct pci_dev *pdev = tp->pci_dev;
> +
> +	if (pdev->vendor == 0x10ec && pdev->device == 0x8136 && !tp->supports_gmii)
> +		return 1;
> +
> +	return 0;
> +}
> +
>  static void rtl8168dp_driver_start(struct rtl8169_private *tp)
>  {
>  	r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
> @@ -4608,6 +4624,75 @@ static void rtl_task(struct work_struct *work)
>  	rtnl_unlock();
>  }
>  
> +static void r8169_phylink_handler(struct net_device *ndev)
> +{
> +	struct rtl8169_private *tp = netdev_priv(ndev);
> +
> +	if (netif_carrier_ok(ndev)) {
> +		rtl_link_chg_patch(tp);
> +		pm_request_resume(&tp->pci_dev->dev);
> +	} else {
> +		pm_runtime_idle(&tp->pci_dev->dev);
> +	}
> +
> +	if (net_ratelimit())
> +		phy_print_status(tp->phydev);
> +}
> +
> +static unsigned int
> +rtl8169_xmii_link_ok(struct net_device *dev)
> +{
> +	struct rtl8169_private *tp = netdev_priv(dev);
> +	unsigned int retval;
> +
> +	retval = (RTL_R8(tp, PHYstatus) & LinkStatus) ? 1 : 0;
> +
> +	return retval;
> +}
> +
> +static void
> +rtl8169_check_link_status(struct net_device *dev)
> +{
> +	struct rtl8169_private *tp = netdev_priv(dev);
> +	int link_status_on;
> +
> +	link_status_on = rtl8169_xmii_link_ok(dev);
> +
> +	if (netif_carrier_ok(dev) == link_status_on)
> +		return;
> +
> +	phy_mac_interrupt(tp->phydev);
> +
> +	r8169_phylink_handler (dev);
> +}
> +
> +static void rtl8169_link_timer(struct timer_list *t)
> +{
> +	struct rtl8169_private *tp = from_timer(tp, t, link_timer);
> +	struct net_device *dev = tp->dev;
> +	struct timer_list *timer = t;
> +	unsigned long flags;

flags isn't used and triggers a compiler warning. Did you even
compile-test your patch?

> +
> +	rtl8169_check_link_status(dev);
> +
> +	if (timer_pending(&tp->link_timer))
> +		return;
> +
> +	mod_timer(timer, jiffies + RTL8169_LINK_TIMEOUT);
> +}
> +
> +static inline void rtl8169_delete_link_timer(struct net_device *dev, struct timer_list *timer)
> +{
> +	del_timer_sync(timer);
> +}
> +
> +static inline void rtl8169_request_link_timer(struct net_device *dev)
> +{
> +	struct rtl8169_private *tp = netdev_priv(dev);
> +
> +	timer_setup(&tp->link_timer, rtl8169_link_timer, TIMER_INIT_FLAGS);
> +}
> +
>  static int rtl8169_poll(struct napi_struct *napi, int budget)
>  {
>  	struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
> @@ -4624,21 +4709,6 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
>  	return work_done;
>  }
>  
> -static void r8169_phylink_handler(struct net_device *ndev)
> -{
> -	struct rtl8169_private *tp = netdev_priv(ndev);
> -
> -	if (netif_carrier_ok(ndev)) {
> -		rtl_link_chg_patch(tp);
> -		pm_request_resume(&tp->pci_dev->dev);
> -	} else {
> -		pm_runtime_idle(&tp->pci_dev->dev);
> -	}
> -
> -	if (net_ratelimit())
> -		phy_print_status(tp->phydev);
> -}
> -
>  static int r8169_phy_connect(struct rtl8169_private *tp)
>  {
>  	struct phy_device *phydev = tp->phydev;
> @@ -4769,6 +4839,10 @@ static int rtl_open(struct net_device *dev)
>  		goto err_free_irq;
>  
>  	rtl8169_up(tp);
> +
> +	if (rtl_link_chng_polling_quirk(tp))
> +		mod_timer(&tp->link_timer, jiffies + RTL8169_LINK_TIMEOUT);
> +
>  	rtl8169_init_counter_offsets(tp);
>  	netif_start_queue(dev);
>  out:
> @@ -4991,7 +5065,10 @@ static const struct net_device_ops rtl_netdev_ops = {
>  
>  static void rtl_set_irq_mask(struct rtl8169_private *tp)
>  {
> -	tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
> +	tp->irq_mask = RxOK | RxErr | TxOK | TxErr;
> +
> +	if (!rtl_link_chng_polling_quirk(tp))
> +		tp->irq_mask |= LinkChg;
>  
>  	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
>  		tp->irq_mask |= SYSErr | RxOverflow | RxFIFOOver;
> @@ -5436,6 +5513,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	if (pci_dev_run_wake(pdev))
>  		pm_runtime_put_sync(&pdev->dev);
>  
> +	if (rtl_link_chng_polling_quirk(tp))
> +		rtl8169_request_link_timer(dev);
> +
>  	return 0;
>  }
>  
> 

All this isn't needed. If you want to switch to link status polling,
why don't you simply let phylib do it? PHY_MAC_INTERRUPT -> PHY_POLL

Your timer-based code most likely would have problems if runtime pm
is enabled. Then you try to read the link status whilst NIC is in
D3hot.
kernel test robot June 3, 2021, 9:33 p.m. UTC | #2
Hi Koba,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on ipvs/master]
[also build test WARNING on linus/master v5.13-rc4 next-20210603]
[cannot apply to sparc-next/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Koba-Ko/r8169-introduce-polling-method-for-link-change/20210603-105524
base:   https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs.git master
config: mips-randconfig-r014-20210603 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project d8e0ae9a76a62bdc6117630d59bf9967ac9bb4ea)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install mips cross compiling tool for clang build
        # apt-get install binutils-mips-linux-gnu
        # https://github.com/0day-ci/linux/commit/99be97b442a155a21c4a7e4a34a98da1258105f6
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Koba-Ko/r8169-introduce-polling-method-for-link-change/20210603-105524
        git checkout 99be97b442a155a21c4a7e4a34a98da1258105f6
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=mips 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/net/ethernet/realtek/r8169_main.c:4674:16: warning: unused variable 'flags' [-Wunused-variable]
           unsigned long flags;
                         ^
>> drivers/net/ethernet/realtek/r8169_main.c:181:35: warning: unused variable 'rtl8169_linkChg_polling_enabled' [-Wunused-const-variable]
   static const struct pci_device_id rtl8169_linkChg_polling_enabled[] = {
                                     ^
   drivers/net/ethernet/realtek/r8169_main.c:4684:20: warning: unused function 'rtl8169_delete_link_timer' [-Wunused-function]
   static inline void rtl8169_delete_link_timer(struct net_device *dev, struct timer_list *timer)
                      ^
   3 warnings generated.


vim +/rtl8169_linkChg_polling_enabled +181 drivers/net/ethernet/realtek/r8169_main.c

   180	
 > 181	static const struct pci_device_id rtl8169_linkChg_polling_enabled[] = {
   182		{ PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT },
   183		{ 0 }
   184	};
   185	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Koba Ko June 4, 2021, 7:22 a.m. UTC | #3
On Thu, Jun 3, 2021 at 6:00 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>
> On 03.06.2021 04:54, Koba Ko wrote:
> > For RTL8106E, it's a Fast-ethernet chip.
> > If ASPM is enabled, the link chang interrupt wouldn't be triggered
> > immediately and must wait a very long time to get link change interrupt.
> > Even the link change interrupt isn't triggered, the phy link is already
> > established.
> >
> At first please provide a full dmesg log and output of lspci -vv.
> Do you have the firmware for the NIC loaded? Please provide "ethtool -i <if>"
> output.

please get the logs from here,
https://bugzilla.kernel.org/show_bug.cgi?id=213165

> Does the issue affect link-down and/or link-up detection?
> Do you have runtime pm enabled? Then, after 10s of link-down NIC goes to
> D3hot and link-up detection triggers a PME.

Issue affect link-up.
yes, pm runtime is enabled, but rtl8106e always stays D0 even if the
cable isn't present.

>
> > Introduce a polling method to watch the status of phy link and disable
> > the link change interrupt.
> > Also add a quirk for those realtek devices have the same issue.
> >
> Which are the affected chip versions? Did you check with Realtek?
> Your patch switches to polling for all Fast Ethernet versions,
> and that's not what we want.

I don't know the exact version, only the chip name 806e(pci device id 0x8165).
ok, Im asking Realtek to help how to identify the chip issue is observed.

>
> My suspicion would be that something is system-dependent. Else I think
> we would have seen such a report before.
On the mainline, the aspm is disable, so you may not observe this.
If you enable ASPM and must wait CHIP go to power-saving mode, then
you can observe the issue.
>
> > Signed-off-by: Koba Ko <koba.ko@canonical.com>
> > ---
> >  drivers/net/ethernet/realtek/r8169.h      |   2 +
> >  drivers/net/ethernet/realtek/r8169_main.c | 112 ++++++++++++++++++----
> >  2 files changed, 98 insertions(+), 16 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
> > index 2728df46ec41..a8c71adb1b57 100644
> > --- a/drivers/net/ethernet/realtek/r8169.h
> > +++ b/drivers/net/ethernet/realtek/r8169.h
> > @@ -11,6 +11,8 @@
> >  #include <linux/types.h>
> >  #include <linux/phy.h>
> >
> > +#define RTL8169_LINK_TIMEOUT (1 * HZ)
> > +
> >  enum mac_version {
> >       /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
> >       RTL_GIGA_MAC_VER_02,
> > diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> > index 2c89cde7da1e..70aacc83d641 100644
> > --- a/drivers/net/ethernet/realtek/r8169_main.c
> > +++ b/drivers/net/ethernet/realtek/r8169_main.c
> > @@ -178,6 +178,11 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
> >
> >  MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
> >
> > +static const struct pci_device_id rtl8169_linkChg_polling_enabled[] = {
> > +     { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT },
> > +     { 0 }
> > +};
> > +
>
> This doesn't seem to be used.
>
> >  enum rtl_registers {
> >       MAC0            = 0,    /* Ethernet hardware address. */
> >       MAC4            = 4,
> > @@ -618,6 +623,7 @@ struct rtl8169_private {
> >       u16 cp_cmd;
> >       u32 irq_mask;
> >       struct clk *clk;
> > +     struct timer_list link_timer;
> >
> >       struct {
> >               DECLARE_BITMAP(flags, RTL_FLAG_MAX);
> > @@ -1179,6 +1185,16 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
> >       RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
> >  }
> >
> > +static int rtl_link_chng_polling_quirk(struct rtl8169_private *tp)
> > +{
> > +     struct pci_dev *pdev = tp->pci_dev;
> > +
> > +     if (pdev->vendor == 0x10ec && pdev->device == 0x8136 && !tp->supports_gmii)
> > +             return 1;
> > +
> > +     return 0;
> > +}
> > +
> >  static void rtl8168dp_driver_start(struct rtl8169_private *tp)
> >  {
> >       r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
> > @@ -4608,6 +4624,75 @@ static void rtl_task(struct work_struct *work)
> >       rtnl_unlock();
> >  }
> >
> > +static void r8169_phylink_handler(struct net_device *ndev)
> > +{
> > +     struct rtl8169_private *tp = netdev_priv(ndev);
> > +
> > +     if (netif_carrier_ok(ndev)) {
> > +             rtl_link_chg_patch(tp);
> > +             pm_request_resume(&tp->pci_dev->dev);
> > +     } else {
> > +             pm_runtime_idle(&tp->pci_dev->dev);
> > +     }
> > +
> > +     if (net_ratelimit())
> > +             phy_print_status(tp->phydev);
> > +}
> > +
> > +static unsigned int
> > +rtl8169_xmii_link_ok(struct net_device *dev)
> > +{
> > +     struct rtl8169_private *tp = netdev_priv(dev);
> > +     unsigned int retval;
> > +
> > +     retval = (RTL_R8(tp, PHYstatus) & LinkStatus) ? 1 : 0;
> > +
> > +     return retval;
> > +}
> > +
> > +static void
> > +rtl8169_check_link_status(struct net_device *dev)
> > +{
> > +     struct rtl8169_private *tp = netdev_priv(dev);
> > +     int link_status_on;
> > +
> > +     link_status_on = rtl8169_xmii_link_ok(dev);
> > +
> > +     if (netif_carrier_ok(dev) == link_status_on)
> > +             return;
> > +
> > +     phy_mac_interrupt(tp->phydev);
> > +
> > +     r8169_phylink_handler (dev);
> > +}
> > +
> > +static void rtl8169_link_timer(struct timer_list *t)
> > +{
> > +     struct rtl8169_private *tp = from_timer(tp, t, link_timer);
> > +     struct net_device *dev = tp->dev;
> > +     struct timer_list *timer = t;
> > +     unsigned long flags;
>
> flags isn't used and triggers a compiler warning. Did you even
> compile-test your patch?
>
> > +
> > +     rtl8169_check_link_status(dev);
> > +
> > +     if (timer_pending(&tp->link_timer))
> > +             return;
> > +
> > +     mod_timer(timer, jiffies + RTL8169_LINK_TIMEOUT);
> > +}
> > +
> > +static inline void rtl8169_delete_link_timer(struct net_device *dev, struct timer_list *timer)
> > +{
> > +     del_timer_sync(timer);
> > +}
> > +
> > +static inline void rtl8169_request_link_timer(struct net_device *dev)
> > +{
> > +     struct rtl8169_private *tp = netdev_priv(dev);
> > +
> > +     timer_setup(&tp->link_timer, rtl8169_link_timer, TIMER_INIT_FLAGS);
> > +}
> > +
> >  static int rtl8169_poll(struct napi_struct *napi, int budget)
> >  {
> >       struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
> > @@ -4624,21 +4709,6 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
> >       return work_done;
> >  }
> >
> > -static void r8169_phylink_handler(struct net_device *ndev)
> > -{
> > -     struct rtl8169_private *tp = netdev_priv(ndev);
> > -
> > -     if (netif_carrier_ok(ndev)) {
> > -             rtl_link_chg_patch(tp);
> > -             pm_request_resume(&tp->pci_dev->dev);
> > -     } else {
> > -             pm_runtime_idle(&tp->pci_dev->dev);
> > -     }
> > -
> > -     if (net_ratelimit())
> > -             phy_print_status(tp->phydev);
> > -}
> > -
> >  static int r8169_phy_connect(struct rtl8169_private *tp)
> >  {
> >       struct phy_device *phydev = tp->phydev;
> > @@ -4769,6 +4839,10 @@ static int rtl_open(struct net_device *dev)
> >               goto err_free_irq;
> >
> >       rtl8169_up(tp);
> > +
> > +     if (rtl_link_chng_polling_quirk(tp))
> > +             mod_timer(&tp->link_timer, jiffies + RTL8169_LINK_TIMEOUT);
> > +
> >       rtl8169_init_counter_offsets(tp);
> >       netif_start_queue(dev);
> >  out:
> > @@ -4991,7 +5065,10 @@ static const struct net_device_ops rtl_netdev_ops = {
> >
> >  static void rtl_set_irq_mask(struct rtl8169_private *tp)
> >  {
> > -     tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
> > +     tp->irq_mask = RxOK | RxErr | TxOK | TxErr;
> > +
> > +     if (!rtl_link_chng_polling_quirk(tp))
> > +             tp->irq_mask |= LinkChg;
> >
> >       if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
> >               tp->irq_mask |= SYSErr | RxOverflow | RxFIFOOver;
> > @@ -5436,6 +5513,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> >       if (pci_dev_run_wake(pdev))
> >               pm_runtime_put_sync(&pdev->dev);
> >
> > +     if (rtl_link_chng_polling_quirk(tp))
> > +             rtl8169_request_link_timer(dev);
> > +
> >       return 0;
> >  }
> >
> >
>
> All this isn't needed. If you want to switch to link status polling,
> why don't you simply let phylib do it? PHY_MAC_INTERRUPT -> PHY_POLL

Thanks for suggestions, I tried to use PHY_POLL, it could do the same
thing that I did.

> Your timer-based code most likely would have problems if runtime pm
> is enabled. Then you try to read the link status whilst NIC is in
> D3hot.
Heiner Kallweit June 4, 2021, 8:23 a.m. UTC | #4
On 04.06.2021 09:22, Koba Ko wrote:
> On Thu, Jun 3, 2021 at 6:00 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>>
>> On 03.06.2021 04:54, Koba Ko wrote:
>>> For RTL8106E, it's a Fast-ethernet chip.
>>> If ASPM is enabled, the link chang interrupt wouldn't be triggered
>>> immediately and must wait a very long time to get link change interrupt.
>>> Even the link change interrupt isn't triggered, the phy link is already
>>> established.
>>>
>> At first please provide a full dmesg log and output of lspci -vv.
>> Do you have the firmware for the NIC loaded? Please provide "ethtool -i <if>"
>> output.
> 
> please get the logs from here,
> https://bugzilla.kernel.org/show_bug.cgi?id=213165
> 
>> Does the issue affect link-down and/or link-up detection?
>> Do you have runtime pm enabled? Then, after 10s of link-down NIC goes to
>> D3hot and link-up detection triggers a PME.
> 
> Issue affect link-up.
> yes, pm runtime is enabled, but rtl8106e always stays D0 even if the
> cable isn't present.
> 
Then runtime pm doesn't seem to be set to "auto". Else 10s after link loss
the chip runtime-suspends and is set to D3hot.

>>
>>> Introduce a polling method to watch the status of phy link and disable
>>> the link change interrupt.
>>> Also add a quirk for those realtek devices have the same issue.
>>>
>> Which are the affected chip versions? Did you check with Realtek?
>> Your patch switches to polling for all Fast Ethernet versions,
>> and that's not what we want.
> 
> I don't know the exact version, only the chip name 806e(pci device id 0x8165).
> ok, Im asking Realtek to help how to identify the chip issue is observed.
> 
At least your Bugzilla report refers to VER_39. PCI device id 0x8136 is shared
by all fast ethernet chip versions.
Do you know other affected chip versions apart from VER_39 ?

In the Bugzilla report you also write the issue occurs with GBit-capable
link partners. This sounds more like an aneg problem.
The issue doesn't occur with fast ethernet link partners?

Your bug report also includes a patch that disables L1_1 only.
Not sure how this is related because the chip version we speak about
here doesn't support L1 sub-states.

>>
>> My suspicion would be that something is system-dependent. Else I think
>> we would have seen such a report before.
> On the mainline, the aspm is disable, so you may not observe this.
> If you enable ASPM and must wait CHIP go to power-saving mode, then
> you can observe the issue.
>>

So what you're saying is that mainline is fine and your problem is with
a downstream kernel with re-enabled ASPM? So there's nothing broken in
mainline? In mainline you have the option to re-enable ASPM states
individually via sysfs (link subdir at pci device).

>>> Signed-off-by: Koba Ko <koba.ko@canonical.com>
>>> ---
>>>  drivers/net/ethernet/realtek/r8169.h      |   2 +
>>>  drivers/net/ethernet/realtek/r8169_main.c | 112 ++++++++++++++++++----
>>>  2 files changed, 98 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
>>> index 2728df46ec41..a8c71adb1b57 100644
>>> --- a/drivers/net/ethernet/realtek/r8169.h
>>> +++ b/drivers/net/ethernet/realtek/r8169.h
>>> @@ -11,6 +11,8 @@
>>>  #include <linux/types.h>
>>>  #include <linux/phy.h>
>>>
>>> +#define RTL8169_LINK_TIMEOUT (1 * HZ)
>>> +
>>>  enum mac_version {
>>>       /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
>>>       RTL_GIGA_MAC_VER_02,
>>> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
>>> index 2c89cde7da1e..70aacc83d641 100644
>>> --- a/drivers/net/ethernet/realtek/r8169_main.c
>>> +++ b/drivers/net/ethernet/realtek/r8169_main.c
>>> @@ -178,6 +178,11 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
>>>
>>>  MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
>>>
>>> +static const struct pci_device_id rtl8169_linkChg_polling_enabled[] = {
>>> +     { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT },
>>> +     { 0 }
>>> +};
>>> +
>>
>> This doesn't seem to be used.
>>
>>>  enum rtl_registers {
>>>       MAC0            = 0,    /* Ethernet hardware address. */
>>>       MAC4            = 4,
>>> @@ -618,6 +623,7 @@ struct rtl8169_private {
>>>       u16 cp_cmd;
>>>       u32 irq_mask;
>>>       struct clk *clk;
>>> +     struct timer_list link_timer;
>>>
>>>       struct {
>>>               DECLARE_BITMAP(flags, RTL_FLAG_MAX);
>>> @@ -1179,6 +1185,16 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
>>>       RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
>>>  }
>>>
>>> +static int rtl_link_chng_polling_quirk(struct rtl8169_private *tp)
>>> +{
>>> +     struct pci_dev *pdev = tp->pci_dev;
>>> +
>>> +     if (pdev->vendor == 0x10ec && pdev->device == 0x8136 && !tp->supports_gmii)
>>> +             return 1;
>>> +
>>> +     return 0;
>>> +}
>>> +
>>>  static void rtl8168dp_driver_start(struct rtl8169_private *tp)
>>>  {
>>>       r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
>>> @@ -4608,6 +4624,75 @@ static void rtl_task(struct work_struct *work)
>>>       rtnl_unlock();
>>>  }
>>>
>>> +static void r8169_phylink_handler(struct net_device *ndev)
>>> +{
>>> +     struct rtl8169_private *tp = netdev_priv(ndev);
>>> +
>>> +     if (netif_carrier_ok(ndev)) {
>>> +             rtl_link_chg_patch(tp);
>>> +             pm_request_resume(&tp->pci_dev->dev);
>>> +     } else {
>>> +             pm_runtime_idle(&tp->pci_dev->dev);
>>> +     }
>>> +
>>> +     if (net_ratelimit())
>>> +             phy_print_status(tp->phydev);
>>> +}
>>> +
>>> +static unsigned int
>>> +rtl8169_xmii_link_ok(struct net_device *dev)
>>> +{
>>> +     struct rtl8169_private *tp = netdev_priv(dev);
>>> +     unsigned int retval;
>>> +
>>> +     retval = (RTL_R8(tp, PHYstatus) & LinkStatus) ? 1 : 0;
>>> +
>>> +     return retval;
>>> +}
>>> +
>>> +static void
>>> +rtl8169_check_link_status(struct net_device *dev)
>>> +{
>>> +     struct rtl8169_private *tp = netdev_priv(dev);
>>> +     int link_status_on;
>>> +
>>> +     link_status_on = rtl8169_xmii_link_ok(dev);
>>> +
>>> +     if (netif_carrier_ok(dev) == link_status_on)
>>> +             return;
>>> +
>>> +     phy_mac_interrupt(tp->phydev);
>>> +
>>> +     r8169_phylink_handler (dev);
>>> +}
>>> +
>>> +static void rtl8169_link_timer(struct timer_list *t)
>>> +{
>>> +     struct rtl8169_private *tp = from_timer(tp, t, link_timer);
>>> +     struct net_device *dev = tp->dev;
>>> +     struct timer_list *timer = t;
>>> +     unsigned long flags;
>>
>> flags isn't used and triggers a compiler warning. Did you even
>> compile-test your patch?
>>
>>> +
>>> +     rtl8169_check_link_status(dev);
>>> +
>>> +     if (timer_pending(&tp->link_timer))
>>> +             return;
>>> +
>>> +     mod_timer(timer, jiffies + RTL8169_LINK_TIMEOUT);
>>> +}
>>> +
>>> +static inline void rtl8169_delete_link_timer(struct net_device *dev, struct timer_list *timer)
>>> +{
>>> +     del_timer_sync(timer);
>>> +}
>>> +
>>> +static inline void rtl8169_request_link_timer(struct net_device *dev)
>>> +{
>>> +     struct rtl8169_private *tp = netdev_priv(dev);
>>> +
>>> +     timer_setup(&tp->link_timer, rtl8169_link_timer, TIMER_INIT_FLAGS);
>>> +}
>>> +
>>>  static int rtl8169_poll(struct napi_struct *napi, int budget)
>>>  {
>>>       struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
>>> @@ -4624,21 +4709,6 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
>>>       return work_done;
>>>  }
>>>
>>> -static void r8169_phylink_handler(struct net_device *ndev)
>>> -{
>>> -     struct rtl8169_private *tp = netdev_priv(ndev);
>>> -
>>> -     if (netif_carrier_ok(ndev)) {
>>> -             rtl_link_chg_patch(tp);
>>> -             pm_request_resume(&tp->pci_dev->dev);
>>> -     } else {
>>> -             pm_runtime_idle(&tp->pci_dev->dev);
>>> -     }
>>> -
>>> -     if (net_ratelimit())
>>> -             phy_print_status(tp->phydev);
>>> -}
>>> -
>>>  static int r8169_phy_connect(struct rtl8169_private *tp)
>>>  {
>>>       struct phy_device *phydev = tp->phydev;
>>> @@ -4769,6 +4839,10 @@ static int rtl_open(struct net_device *dev)
>>>               goto err_free_irq;
>>>
>>>       rtl8169_up(tp);
>>> +
>>> +     if (rtl_link_chng_polling_quirk(tp))
>>> +             mod_timer(&tp->link_timer, jiffies + RTL8169_LINK_TIMEOUT);
>>> +
>>>       rtl8169_init_counter_offsets(tp);
>>>       netif_start_queue(dev);
>>>  out:
>>> @@ -4991,7 +5065,10 @@ static const struct net_device_ops rtl_netdev_ops = {
>>>
>>>  static void rtl_set_irq_mask(struct rtl8169_private *tp)
>>>  {
>>> -     tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
>>> +     tp->irq_mask = RxOK | RxErr | TxOK | TxErr;
>>> +
>>> +     if (!rtl_link_chng_polling_quirk(tp))
>>> +             tp->irq_mask |= LinkChg;
>>>
>>>       if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
>>>               tp->irq_mask |= SYSErr | RxOverflow | RxFIFOOver;
>>> @@ -5436,6 +5513,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>>>       if (pci_dev_run_wake(pdev))
>>>               pm_runtime_put_sync(&pdev->dev);
>>>
>>> +     if (rtl_link_chng_polling_quirk(tp))
>>> +             rtl8169_request_link_timer(dev);
>>> +
>>>       return 0;
>>>  }
>>>
>>>
>>
>> All this isn't needed. If you want to switch to link status polling,
>> why don't you simply let phylib do it? PHY_MAC_INTERRUPT -> PHY_POLL
> 
> Thanks for suggestions, I tried to use PHY_POLL, it could do the same
> thing that I did.
> 
>> Your timer-based code most likely would have problems if runtime pm
>> is enabled. Then you try to read the link status whilst NIC is in
>> D3hot.
Koba Ko June 4, 2021, 9:08 a.m. UTC | #5
On Fri, Jun 4, 2021 at 4:23 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>
> On 04.06.2021 09:22, Koba Ko wrote:
> > On Thu, Jun 3, 2021 at 6:00 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
> >>
> >> On 03.06.2021 04:54, Koba Ko wrote:
> >>> For RTL8106E, it's a Fast-ethernet chip.
> >>> If ASPM is enabled, the link chang interrupt wouldn't be triggered
> >>> immediately and must wait a very long time to get link change interrupt.
> >>> Even the link change interrupt isn't triggered, the phy link is already
> >>> established.
> >>>
> >> At first please provide a full dmesg log and output of lspci -vv.
> >> Do you have the firmware for the NIC loaded? Please provide "ethtool -i <if>"
> >> output.
> >
> > please get the logs from here,
> > https://bugzilla.kernel.org/show_bug.cgi?id=213165
> >
> >> Does the issue affect link-down and/or link-up detection?
> >> Do you have runtime pm enabled? Then, after 10s of link-down NIC goes to
> >> D3hot and link-up detection triggers a PME.
> >
> > Issue affect link-up.
> > yes, pm runtime is enabled, but rtl8106e always stays D0 even if the
> > cable isn't present.
> >
> Then runtime pm doesn't seem to be set to "auto". Else 10s after link loss
> the chip runtime-suspends and is set to D3hot.

I will check this.

>
> >>
> >>> Introduce a polling method to watch the status of phy link and disable
> >>> the link change interrupt.
> >>> Also add a quirk for those realtek devices have the same issue.
> >>>
> >> Which are the affected chip versions? Did you check with Realtek?
> >> Your patch switches to polling for all Fast Ethernet versions,
> >> and that's not what we want.
> >
> > I don't know the exact version, only the chip name 806e(pci device id 0x8165).
> > ok, Im asking Realtek to help how to identify the chip issue is observed.
> >
> At least your Bugzilla report refers to VER_39. PCI device id 0x8136 is shared
> by all fast ethernet chip versions.
> Do you know other affected chip versions apart from VER_39 ?
>
> In the Bugzilla report you also write the issue occurs with GBit-capable
> link partners. This sounds more like an aneg problem.
> The issue doesn't occur with fast ethernet link partners?

Issue wouldn't be observed when the link-partner has only FE capability.

>
> Your bug report also includes a patch that disables L1_1 only.
> Not sure how this is related because the chip version we speak about
> here doesn't support L1 sub-states.

I have tried to enable L0s, L1 and don't disable L1 substate,
but still get the issue that interrupt can't be fired immediately but
the Link status is up.

>
> >>
> >> My suspicion would be that something is system-dependent. Else I think
> >> we would have seen such a report before.
> > On the mainline, the aspm is disable, so you may not observe this.
> > If you enable ASPM and must wait CHIP go to power-saving mode, then
> > you can observe the issue.
> >>
>
> So what you're saying is that mainline is fine and your problem is with
> a downstream kernel with re-enabled ASPM? So there's nothing broken in
> mainline? In mainline you have the option to re-enable ASPM states
> individually via sysfs (link subdir at pci device).

If enable L1_1 on the mainline, the issue could be observed too.

Thanks
>
> >>> Signed-off-by: Koba Ko <koba.ko@canonical.com>
> >>> ---
> >>>  drivers/net/ethernet/realtek/r8169.h      |   2 +
> >>>  drivers/net/ethernet/realtek/r8169_main.c | 112 ++++++++++++++++++----
> >>>  2 files changed, 98 insertions(+), 16 deletions(-)
> >>>
> >>> diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
> >>> index 2728df46ec41..a8c71adb1b57 100644
> >>> --- a/drivers/net/ethernet/realtek/r8169.h
> >>> +++ b/drivers/net/ethernet/realtek/r8169.h
> >>> @@ -11,6 +11,8 @@
> >>>  #include <linux/types.h>
> >>>  #include <linux/phy.h>
> >>>
> >>> +#define RTL8169_LINK_TIMEOUT (1 * HZ)
> >>> +
> >>>  enum mac_version {
> >>>       /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
> >>>       RTL_GIGA_MAC_VER_02,
> >>> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> >>> index 2c89cde7da1e..70aacc83d641 100644
> >>> --- a/drivers/net/ethernet/realtek/r8169_main.c
> >>> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> >>> @@ -178,6 +178,11 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
> >>>
> >>>  MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
> >>>
> >>> +static const struct pci_device_id rtl8169_linkChg_polling_enabled[] = {
> >>> +     { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT },
> >>> +     { 0 }
> >>> +};
> >>> +
> >>
> >> This doesn't seem to be used.
> >>
> >>>  enum rtl_registers {
> >>>       MAC0            = 0,    /* Ethernet hardware address. */
> >>>       MAC4            = 4,
> >>> @@ -618,6 +623,7 @@ struct rtl8169_private {
> >>>       u16 cp_cmd;
> >>>       u32 irq_mask;
> >>>       struct clk *clk;
> >>> +     struct timer_list link_timer;
> >>>
> >>>       struct {
> >>>               DECLARE_BITMAP(flags, RTL_FLAG_MAX);
> >>> @@ -1179,6 +1185,16 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
> >>>       RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
> >>>  }
> >>>
> >>> +static int rtl_link_chng_polling_quirk(struct rtl8169_private *tp)
> >>> +{
> >>> +     struct pci_dev *pdev = tp->pci_dev;
> >>> +
> >>> +     if (pdev->vendor == 0x10ec && pdev->device == 0x8136 && !tp->supports_gmii)
> >>> +             return 1;
> >>> +
> >>> +     return 0;
> >>> +}
> >>> +
> >>>  static void rtl8168dp_driver_start(struct rtl8169_private *tp)
> >>>  {
> >>>       r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
> >>> @@ -4608,6 +4624,75 @@ static void rtl_task(struct work_struct *work)
> >>>       rtnl_unlock();
> >>>  }
> >>>
> >>> +static void r8169_phylink_handler(struct net_device *ndev)
> >>> +{
> >>> +     struct rtl8169_private *tp = netdev_priv(ndev);
> >>> +
> >>> +     if (netif_carrier_ok(ndev)) {
> >>> +             rtl_link_chg_patch(tp);
> >>> +             pm_request_resume(&tp->pci_dev->dev);
> >>> +     } else {
> >>> +             pm_runtime_idle(&tp->pci_dev->dev);
> >>> +     }
> >>> +
> >>> +     if (net_ratelimit())
> >>> +             phy_print_status(tp->phydev);
> >>> +}
> >>> +
> >>> +static unsigned int
> >>> +rtl8169_xmii_link_ok(struct net_device *dev)
> >>> +{
> >>> +     struct rtl8169_private *tp = netdev_priv(dev);
> >>> +     unsigned int retval;
> >>> +
> >>> +     retval = (RTL_R8(tp, PHYstatus) & LinkStatus) ? 1 : 0;
> >>> +
> >>> +     return retval;
> >>> +}
> >>> +
> >>> +static void
> >>> +rtl8169_check_link_status(struct net_device *dev)
> >>> +{
> >>> +     struct rtl8169_private *tp = netdev_priv(dev);
> >>> +     int link_status_on;
> >>> +
> >>> +     link_status_on = rtl8169_xmii_link_ok(dev);
> >>> +
> >>> +     if (netif_carrier_ok(dev) == link_status_on)
> >>> +             return;
> >>> +
> >>> +     phy_mac_interrupt(tp->phydev);
> >>> +
> >>> +     r8169_phylink_handler (dev);
> >>> +}
> >>> +
> >>> +static void rtl8169_link_timer(struct timer_list *t)
> >>> +{
> >>> +     struct rtl8169_private *tp = from_timer(tp, t, link_timer);
> >>> +     struct net_device *dev = tp->dev;
> >>> +     struct timer_list *timer = t;
> >>> +     unsigned long flags;
> >>
> >> flags isn't used and triggers a compiler warning. Did you even
> >> compile-test your patch?
> >>
> >>> +
> >>> +     rtl8169_check_link_status(dev);
> >>> +
> >>> +     if (timer_pending(&tp->link_timer))
> >>> +             return;
> >>> +
> >>> +     mod_timer(timer, jiffies + RTL8169_LINK_TIMEOUT);
> >>> +}
> >>> +
> >>> +static inline void rtl8169_delete_link_timer(struct net_device *dev, struct timer_list *timer)
> >>> +{
> >>> +     del_timer_sync(timer);
> >>> +}
> >>> +
> >>> +static inline void rtl8169_request_link_timer(struct net_device *dev)
> >>> +{
> >>> +     struct rtl8169_private *tp = netdev_priv(dev);
> >>> +
> >>> +     timer_setup(&tp->link_timer, rtl8169_link_timer, TIMER_INIT_FLAGS);
> >>> +}
> >>> +
> >>>  static int rtl8169_poll(struct napi_struct *napi, int budget)
> >>>  {
> >>>       struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
> >>> @@ -4624,21 +4709,6 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
> >>>       return work_done;
> >>>  }
> >>>
> >>> -static void r8169_phylink_handler(struct net_device *ndev)
> >>> -{
> >>> -     struct rtl8169_private *tp = netdev_priv(ndev);
> >>> -
> >>> -     if (netif_carrier_ok(ndev)) {
> >>> -             rtl_link_chg_patch(tp);
> >>> -             pm_request_resume(&tp->pci_dev->dev);
> >>> -     } else {
> >>> -             pm_runtime_idle(&tp->pci_dev->dev);
> >>> -     }
> >>> -
> >>> -     if (net_ratelimit())
> >>> -             phy_print_status(tp->phydev);
> >>> -}
> >>> -
> >>>  static int r8169_phy_connect(struct rtl8169_private *tp)
> >>>  {
> >>>       struct phy_device *phydev = tp->phydev;
> >>> @@ -4769,6 +4839,10 @@ static int rtl_open(struct net_device *dev)
> >>>               goto err_free_irq;
> >>>
> >>>       rtl8169_up(tp);
> >>> +
> >>> +     if (rtl_link_chng_polling_quirk(tp))
> >>> +             mod_timer(&tp->link_timer, jiffies + RTL8169_LINK_TIMEOUT);
> >>> +
> >>>       rtl8169_init_counter_offsets(tp);
> >>>       netif_start_queue(dev);
> >>>  out:
> >>> @@ -4991,7 +5065,10 @@ static const struct net_device_ops rtl_netdev_ops = {
> >>>
> >>>  static void rtl_set_irq_mask(struct rtl8169_private *tp)
> >>>  {
> >>> -     tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
> >>> +     tp->irq_mask = RxOK | RxErr | TxOK | TxErr;
> >>> +
> >>> +     if (!rtl_link_chng_polling_quirk(tp))
> >>> +             tp->irq_mask |= LinkChg;
> >>>
> >>>       if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
> >>>               tp->irq_mask |= SYSErr | RxOverflow | RxFIFOOver;
> >>> @@ -5436,6 +5513,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> >>>       if (pci_dev_run_wake(pdev))
> >>>               pm_runtime_put_sync(&pdev->dev);
> >>>
> >>> +     if (rtl_link_chng_polling_quirk(tp))
> >>> +             rtl8169_request_link_timer(dev);
> >>> +
> >>>       return 0;
> >>>  }
> >>>
> >>>
> >>
> >> All this isn't needed. If you want to switch to link status polling,
> >> why don't you simply let phylib do it? PHY_MAC_INTERRUPT -> PHY_POLL
> >
> > Thanks for suggestions, I tried to use PHY_POLL, it could do the same
> > thing that I did.
> >
> >> Your timer-based code most likely would have problems if runtime pm
> >> is enabled. Then you try to read the link status whilst NIC is in
> >> D3hot.
>
Heiner Kallweit June 4, 2021, 11:59 a.m. UTC | #6
On 04.06.2021 11:08, Koba Ko wrote:
> On Fri, Jun 4, 2021 at 4:23 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>>
>> On 04.06.2021 09:22, Koba Ko wrote:
>>> On Thu, Jun 3, 2021 at 6:00 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>>>>
>>>> On 03.06.2021 04:54, Koba Ko wrote:
>>>>> For RTL8106E, it's a Fast-ethernet chip.
>>>>> If ASPM is enabled, the link chang interrupt wouldn't be triggered
>>>>> immediately and must wait a very long time to get link change interrupt.
>>>>> Even the link change interrupt isn't triggered, the phy link is already
>>>>> established.
>>>>>
>>>> At first please provide a full dmesg log and output of lspci -vv.
>>>> Do you have the firmware for the NIC loaded? Please provide "ethtool -i <if>"
>>>> output.
>>>
>>> please get the logs from here,
>>> https://bugzilla.kernel.org/show_bug.cgi?id=213165
>>>
>>>> Does the issue affect link-down and/or link-up detection?
>>>> Do you have runtime pm enabled? Then, after 10s of link-down NIC goes to
>>>> D3hot and link-up detection triggers a PME.
>>>
>>> Issue affect link-up.
>>> yes, pm runtime is enabled, but rtl8106e always stays D0 even if the
>>> cable isn't present.
>>>
>> Then runtime pm doesn't seem to be set to "auto". Else 10s after link loss
>> the chip runtime-suspends and is set to D3hot.
> 
> I will check this.
> 
>>
>>>>
>>>>> Introduce a polling method to watch the status of phy link and disable
>>>>> the link change interrupt.
>>>>> Also add a quirk for those realtek devices have the same issue.
>>>>>
>>>> Which are the affected chip versions? Did you check with Realtek?
>>>> Your patch switches to polling for all Fast Ethernet versions,
>>>> and that's not what we want.
>>>
>>> I don't know the exact version, only the chip name 806e(pci device id 0x8165).
>>> ok, Im asking Realtek to help how to identify the chip issue is observed.
>>>
>> At least your Bugzilla report refers to VER_39. PCI device id 0x8136 is shared
>> by all fast ethernet chip versions.
>> Do you know other affected chip versions apart from VER_39 ?
>>
>> In the Bugzilla report you also write the issue occurs with GBit-capable
>> link partners. This sounds more like an aneg problem.
>> The issue doesn't occur with fast ethernet link partners?
> 
> Issue wouldn't be observed when the link-partner has only FE capability.
> 
Weird. I still have no clue how FE vs. GE support at link partner and
ASPM could be related. I could understand that the PHY might have a
problem with a GE link partner and aneg takes more time than usual.
But this would be completely unrelated to a potential issue with
ASPM on the PCIe link.

And it's also not clear how L1_1 can cause an issue if the NIC doesn't
support L1 sub-states. Maybe the root cause isn't with the NIC but
with some other component in the PCIe path (e.g. bridge).

>>
>> Your bug report also includes a patch that disables L1_1 only.
>> Not sure how this is related because the chip version we speak about
>> here doesn't support L1 sub-states.
> 
> I have tried to enable L0s, L1 and don't disable L1 substate,
> but still get the issue that interrupt can't be fired immediately but
> the Link status is up.
> 
>>
>>>>
>>>> My suspicion would be that something is system-dependent. Else I think
>>>> we would have seen such a report before.
>>> On the mainline, the aspm is disable, so you may not observe this.
>>> If you enable ASPM and must wait CHIP go to power-saving mode, then
>>> you can observe the issue.
>>>>
>>
>> So what you're saying is that mainline is fine and your problem is with
>> a downstream kernel with re-enabled ASPM? So there's nothing broken in
>> mainline? In mainline you have the option to re-enable ASPM states
>> individually via sysfs (link subdir at pci device).
> 
> If enable L1_1 on the mainline, the issue could be observed too.
> 
It has a reason that ASPM is disabled per default in mainline. Different
chip versions have different types of issues with ASPM enabled.
However several chip versions work fine with ASPM (also LI sub-states),
therefore users can re-enable ASPM states at own risk.

> Thanks
>>
>>>>> Signed-off-by: Koba Ko <koba.ko@canonical.com>
>>>>> ---
>>>>>  drivers/net/ethernet/realtek/r8169.h      |   2 +
>>>>>  drivers/net/ethernet/realtek/r8169_main.c | 112 ++++++++++++++++++----
>>>>>  2 files changed, 98 insertions(+), 16 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
>>>>> index 2728df46ec41..a8c71adb1b57 100644
>>>>> --- a/drivers/net/ethernet/realtek/r8169.h
>>>>> +++ b/drivers/net/ethernet/realtek/r8169.h
>>>>> @@ -11,6 +11,8 @@
>>>>>  #include <linux/types.h>
>>>>>  #include <linux/phy.h>
>>>>>
>>>>> +#define RTL8169_LINK_TIMEOUT (1 * HZ)
>>>>> +
>>>>>  enum mac_version {
>>>>>       /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
>>>>>       RTL_GIGA_MAC_VER_02,
>>>>> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
>>>>> index 2c89cde7da1e..70aacc83d641 100644
>>>>> --- a/drivers/net/ethernet/realtek/r8169_main.c
>>>>> +++ b/drivers/net/ethernet/realtek/r8169_main.c
>>>>> @@ -178,6 +178,11 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
>>>>>
>>>>>  MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
>>>>>
>>>>> +static const struct pci_device_id rtl8169_linkChg_polling_enabled[] = {
>>>>> +     { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT },
>>>>> +     { 0 }
>>>>> +};
>>>>> +
>>>>
>>>> This doesn't seem to be used.
>>>>
>>>>>  enum rtl_registers {
>>>>>       MAC0            = 0,    /* Ethernet hardware address. */
>>>>>       MAC4            = 4,
>>>>> @@ -618,6 +623,7 @@ struct rtl8169_private {
>>>>>       u16 cp_cmd;
>>>>>       u32 irq_mask;
>>>>>       struct clk *clk;
>>>>> +     struct timer_list link_timer;
>>>>>
>>>>>       struct {
>>>>>               DECLARE_BITMAP(flags, RTL_FLAG_MAX);
>>>>> @@ -1179,6 +1185,16 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
>>>>>       RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
>>>>>  }
>>>>>
>>>>> +static int rtl_link_chng_polling_quirk(struct rtl8169_private *tp)
>>>>> +{
>>>>> +     struct pci_dev *pdev = tp->pci_dev;
>>>>> +
>>>>> +     if (pdev->vendor == 0x10ec && pdev->device == 0x8136 && !tp->supports_gmii)
>>>>> +             return 1;
>>>>> +
>>>>> +     return 0;
>>>>> +}
>>>>> +
>>>>>  static void rtl8168dp_driver_start(struct rtl8169_private *tp)
>>>>>  {
>>>>>       r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
>>>>> @@ -4608,6 +4624,75 @@ static void rtl_task(struct work_struct *work)
>>>>>       rtnl_unlock();
>>>>>  }
>>>>>
>>>>> +static void r8169_phylink_handler(struct net_device *ndev)
>>>>> +{
>>>>> +     struct rtl8169_private *tp = netdev_priv(ndev);
>>>>> +
>>>>> +     if (netif_carrier_ok(ndev)) {
>>>>> +             rtl_link_chg_patch(tp);
>>>>> +             pm_request_resume(&tp->pci_dev->dev);
>>>>> +     } else {
>>>>> +             pm_runtime_idle(&tp->pci_dev->dev);
>>>>> +     }
>>>>> +
>>>>> +     if (net_ratelimit())
>>>>> +             phy_print_status(tp->phydev);
>>>>> +}
>>>>> +
>>>>> +static unsigned int
>>>>> +rtl8169_xmii_link_ok(struct net_device *dev)
>>>>> +{
>>>>> +     struct rtl8169_private *tp = netdev_priv(dev);
>>>>> +     unsigned int retval;
>>>>> +
>>>>> +     retval = (RTL_R8(tp, PHYstatus) & LinkStatus) ? 1 : 0;
>>>>> +
>>>>> +     return retval;
>>>>> +}
>>>>> +
>>>>> +static void
>>>>> +rtl8169_check_link_status(struct net_device *dev)
>>>>> +{
>>>>> +     struct rtl8169_private *tp = netdev_priv(dev);
>>>>> +     int link_status_on;
>>>>> +
>>>>> +     link_status_on = rtl8169_xmii_link_ok(dev);
>>>>> +
>>>>> +     if (netif_carrier_ok(dev) == link_status_on)
>>>>> +             return;
>>>>> +
>>>>> +     phy_mac_interrupt(tp->phydev);
>>>>> +
>>>>> +     r8169_phylink_handler (dev);
>>>>> +}
>>>>> +
>>>>> +static void rtl8169_link_timer(struct timer_list *t)
>>>>> +{
>>>>> +     struct rtl8169_private *tp = from_timer(tp, t, link_timer);
>>>>> +     struct net_device *dev = tp->dev;
>>>>> +     struct timer_list *timer = t;
>>>>> +     unsigned long flags;
>>>>
>>>> flags isn't used and triggers a compiler warning. Did you even
>>>> compile-test your patch?
>>>>
>>>>> +
>>>>> +     rtl8169_check_link_status(dev);
>>>>> +
>>>>> +     if (timer_pending(&tp->link_timer))
>>>>> +             return;
>>>>> +
>>>>> +     mod_timer(timer, jiffies + RTL8169_LINK_TIMEOUT);
>>>>> +}
>>>>> +
>>>>> +static inline void rtl8169_delete_link_timer(struct net_device *dev, struct timer_list *timer)
>>>>> +{
>>>>> +     del_timer_sync(timer);
>>>>> +}
>>>>> +
>>>>> +static inline void rtl8169_request_link_timer(struct net_device *dev)
>>>>> +{
>>>>> +     struct rtl8169_private *tp = netdev_priv(dev);
>>>>> +
>>>>> +     timer_setup(&tp->link_timer, rtl8169_link_timer, TIMER_INIT_FLAGS);
>>>>> +}
>>>>> +
>>>>>  static int rtl8169_poll(struct napi_struct *napi, int budget)
>>>>>  {
>>>>>       struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
>>>>> @@ -4624,21 +4709,6 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
>>>>>       return work_done;
>>>>>  }
>>>>>
>>>>> -static void r8169_phylink_handler(struct net_device *ndev)
>>>>> -{
>>>>> -     struct rtl8169_private *tp = netdev_priv(ndev);
>>>>> -
>>>>> -     if (netif_carrier_ok(ndev)) {
>>>>> -             rtl_link_chg_patch(tp);
>>>>> -             pm_request_resume(&tp->pci_dev->dev);
>>>>> -     } else {
>>>>> -             pm_runtime_idle(&tp->pci_dev->dev);
>>>>> -     }
>>>>> -
>>>>> -     if (net_ratelimit())
>>>>> -             phy_print_status(tp->phydev);
>>>>> -}
>>>>> -
>>>>>  static int r8169_phy_connect(struct rtl8169_private *tp)
>>>>>  {
>>>>>       struct phy_device *phydev = tp->phydev;
>>>>> @@ -4769,6 +4839,10 @@ static int rtl_open(struct net_device *dev)
>>>>>               goto err_free_irq;
>>>>>
>>>>>       rtl8169_up(tp);
>>>>> +
>>>>> +     if (rtl_link_chng_polling_quirk(tp))
>>>>> +             mod_timer(&tp->link_timer, jiffies + RTL8169_LINK_TIMEOUT);
>>>>> +
>>>>>       rtl8169_init_counter_offsets(tp);
>>>>>       netif_start_queue(dev);
>>>>>  out:
>>>>> @@ -4991,7 +5065,10 @@ static const struct net_device_ops rtl_netdev_ops = {
>>>>>
>>>>>  static void rtl_set_irq_mask(struct rtl8169_private *tp)
>>>>>  {
>>>>> -     tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
>>>>> +     tp->irq_mask = RxOK | RxErr | TxOK | TxErr;
>>>>> +
>>>>> +     if (!rtl_link_chng_polling_quirk(tp))
>>>>> +             tp->irq_mask |= LinkChg;
>>>>>
>>>>>       if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
>>>>>               tp->irq_mask |= SYSErr | RxOverflow | RxFIFOOver;
>>>>> @@ -5436,6 +5513,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>>>>>       if (pci_dev_run_wake(pdev))
>>>>>               pm_runtime_put_sync(&pdev->dev);
>>>>>
>>>>> +     if (rtl_link_chng_polling_quirk(tp))
>>>>> +             rtl8169_request_link_timer(dev);
>>>>> +
>>>>>       return 0;
>>>>>  }
>>>>>
>>>>>
>>>>
>>>> All this isn't needed. If you want to switch to link status polling,
>>>> why don't you simply let phylib do it? PHY_MAC_INTERRUPT -> PHY_POLL
>>>
>>> Thanks for suggestions, I tried to use PHY_POLL, it could do the same
>>> thing that I did.
>>>
>>>> Your timer-based code most likely would have problems if runtime pm
>>>> is enabled. Then you try to read the link status whilst NIC is in
>>>> D3hot.
>>
Koba Ko June 7, 2021, 4:34 a.m. UTC | #7
On Fri, Jun 4, 2021 at 7:59 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>
> On 04.06.2021 11:08, Koba Ko wrote:
> > On Fri, Jun 4, 2021 at 4:23 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
> >>
> >> On 04.06.2021 09:22, Koba Ko wrote:
> >>> On Thu, Jun 3, 2021 at 6:00 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
> >>>>
> >>>> On 03.06.2021 04:54, Koba Ko wrote:
> >>>>> For RTL8106E, it's a Fast-ethernet chip.
> >>>>> If ASPM is enabled, the link chang interrupt wouldn't be triggered
> >>>>> immediately and must wait a very long time to get link change interrupt.
> >>>>> Even the link change interrupt isn't triggered, the phy link is already
> >>>>> established.
> >>>>>
> >>>> At first please provide a full dmesg log and output of lspci -vv.
> >>>> Do you have the firmware for the NIC loaded? Please provide "ethtool -i <if>"
> >>>> output.
> >>>
> >>> please get the logs from here,
> >>> https://bugzilla.kernel.org/show_bug.cgi?id=213165
> >>>
> >>>> Does the issue affect link-down and/or link-up detection?
> >>>> Do you have runtime pm enabled? Then, after 10s of link-down NIC goes to
> >>>> D3hot and link-up detection triggers a PME.
> >>>
> >>> Issue affect link-up.
> >>> yes, pm runtime is enabled, but rtl8106e always stays D0 even if the
> >>> cable isn't present.
> >>>
> >> Then runtime pm doesn't seem to be set to "auto". Else 10s after link loss
> >> the chip runtime-suspends and is set to D3hot.
> >
> > I will check this.
> >
> >>
> >>>>
> >>>>> Introduce a polling method to watch the status of phy link and disable
> >>>>> the link change interrupt.
> >>>>> Also add a quirk for those realtek devices have the same issue.
> >>>>>
> >>>> Which are the affected chip versions? Did you check with Realtek?
> >>>> Your patch switches to polling for all Fast Ethernet versions,
> >>>> and that's not what we want.
> >>>
> >>> I don't know the exact version, only the chip name 806e(pci device id 0x8165).
> >>> ok, Im asking Realtek to help how to identify the chip issue is observed.
> >>>
> >> At least your Bugzilla report refers to VER_39. PCI device id 0x8136 is shared
> >> by all fast ethernet chip versions.
> >> Do you know other affected chip versions apart from VER_39 ?
> >>
> >> In the Bugzilla report you also write the issue occurs with GBit-capable
> >> link partners. This sounds more like an aneg problem.
> >> The issue doesn't occur with fast ethernet link partners?
> >
> > Issue wouldn't be observed when the link-partner has only FE capability.
> >
> Weird. I still have no clue how FE vs. GE support at link partner and
> ASPM could be related. I could understand that the PHY might have a
> problem with a GE link partner and aneg takes more time than usual.
> But this would be completely unrelated to a potential issue with
> ASPM on the PCIe link.
>
> And it's also not clear how L1_1 can cause an issue if the NIC doesn't
> support L1 sub-states. Maybe the root cause isn't with the NIC but
> with some other component in the PCIe path (e.g. bridge).
>

I prefer that there's a interrupt issue when aspm is enabled on RTL8106e,

> >>
> >> Your bug report also includes a patch that disables L1_1 only.
> >> Not sure how this is related because the chip version we speak about
> >> here doesn't support L1 sub-states.
> >
> > I have tried to enable L0s, L1 and don't disable L1 substate,
> > but still get the issue that interrupt can't be fired immediately but
> > the Link status is up.
> >
> >>
> >>>>
> >>>> My suspicion would be that something is system-dependent. Else I think
> >>>> we would have seen such a report before.
> >>> On the mainline, the aspm is disable, so you may not observe this.
> >>> If you enable ASPM and must wait CHIP go to power-saving mode, then
> >>> you can observe the issue.
> >>>>
> >>
> >> So what you're saying is that mainline is fine and your problem is with
> >> a downstream kernel with re-enabled ASPM? So there's nothing broken in
> >> mainline? In mainline you have the option to re-enable ASPM states
> >> individually via sysfs (link subdir at pci device).
> >
> > If enable L1_1 on the mainline, the issue could be observed too.
> >
> It has a reason that ASPM is disabled per default in mainline. Different
> chip versions have different types of issues with ASPM enabled.
> However several chip versions work fine with ASPM (also LI sub-states),
> therefore users can re-enable ASPM states at own risk.

After consulting with REALTEK, I can identify RTL8106e by PCI_VENDOR
REALTEK, DEVICE 0x8136, Revision 0x7.
I would like to make PHY_POLL as default for RTL8106E on V2.
because there's no side effects besides the cpu usage rate would be a
little higher,
How do you think?

> > Thanks
> >>
> >>>>> Signed-off-by: Koba Ko <koba.ko@canonical.com>
> >>>>> ---
> >>>>>  drivers/net/ethernet/realtek/r8169.h      |   2 +
> >>>>>  drivers/net/ethernet/realtek/r8169_main.c | 112 ++++++++++++++++++----
> >>>>>  2 files changed, 98 insertions(+), 16 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
> >>>>> index 2728df46ec41..a8c71adb1b57 100644
> >>>>> --- a/drivers/net/ethernet/realtek/r8169.h
> >>>>> +++ b/drivers/net/ethernet/realtek/r8169.h
> >>>>> @@ -11,6 +11,8 @@
> >>>>>  #include <linux/types.h>
> >>>>>  #include <linux/phy.h>
> >>>>>
> >>>>> +#define RTL8169_LINK_TIMEOUT (1 * HZ)
> >>>>> +
> >>>>>  enum mac_version {
> >>>>>       /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
> >>>>>       RTL_GIGA_MAC_VER_02,
> >>>>> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> >>>>> index 2c89cde7da1e..70aacc83d641 100644
> >>>>> --- a/drivers/net/ethernet/realtek/r8169_main.c
> >>>>> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> >>>>> @@ -178,6 +178,11 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
> >>>>>
> >>>>>  MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
> >>>>>
> >>>>> +static const struct pci_device_id rtl8169_linkChg_polling_enabled[] = {
> >>>>> +     { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT },
> >>>>> +     { 0 }
> >>>>> +};
> >>>>> +
> >>>>
> >>>> This doesn't seem to be used.
> >>>>
> >>>>>  enum rtl_registers {
> >>>>>       MAC0            = 0,    /* Ethernet hardware address. */
> >>>>>       MAC4            = 4,
> >>>>> @@ -618,6 +623,7 @@ struct rtl8169_private {
> >>>>>       u16 cp_cmd;
> >>>>>       u32 irq_mask;
> >>>>>       struct clk *clk;
> >>>>> +     struct timer_list link_timer;
> >>>>>
> >>>>>       struct {
> >>>>>               DECLARE_BITMAP(flags, RTL_FLAG_MAX);
> >>>>> @@ -1179,6 +1185,16 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
> >>>>>       RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
> >>>>>  }
> >>>>>
> >>>>> +static int rtl_link_chng_polling_quirk(struct rtl8169_private *tp)
> >>>>> +{
> >>>>> +     struct pci_dev *pdev = tp->pci_dev;
> >>>>> +
> >>>>> +     if (pdev->vendor == 0x10ec && pdev->device == 0x8136 && !tp->supports_gmii)
> >>>>> +             return 1;
> >>>>> +
> >>>>> +     return 0;
> >>>>> +}
> >>>>> +
> >>>>>  static void rtl8168dp_driver_start(struct rtl8169_private *tp)
> >>>>>  {
> >>>>>       r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
> >>>>> @@ -4608,6 +4624,75 @@ static void rtl_task(struct work_struct *work)
> >>>>>       rtnl_unlock();
> >>>>>  }
> >>>>>
> >>>>> +static void r8169_phylink_handler(struct net_device *ndev)
> >>>>> +{
> >>>>> +     struct rtl8169_private *tp = netdev_priv(ndev);
> >>>>> +
> >>>>> +     if (netif_carrier_ok(ndev)) {
> >>>>> +             rtl_link_chg_patch(tp);
> >>>>> +             pm_request_resume(&tp->pci_dev->dev);
> >>>>> +     } else {
> >>>>> +             pm_runtime_idle(&tp->pci_dev->dev);
> >>>>> +     }
> >>>>> +
> >>>>> +     if (net_ratelimit())
> >>>>> +             phy_print_status(tp->phydev);
> >>>>> +}
> >>>>> +
> >>>>> +static unsigned int
> >>>>> +rtl8169_xmii_link_ok(struct net_device *dev)
> >>>>> +{
> >>>>> +     struct rtl8169_private *tp = netdev_priv(dev);
> >>>>> +     unsigned int retval;
> >>>>> +
> >>>>> +     retval = (RTL_R8(tp, PHYstatus) & LinkStatus) ? 1 : 0;
> >>>>> +
> >>>>> +     return retval;
> >>>>> +}
> >>>>> +
> >>>>> +static void
> >>>>> +rtl8169_check_link_status(struct net_device *dev)
> >>>>> +{
> >>>>> +     struct rtl8169_private *tp = netdev_priv(dev);
> >>>>> +     int link_status_on;
> >>>>> +
> >>>>> +     link_status_on = rtl8169_xmii_link_ok(dev);
> >>>>> +
> >>>>> +     if (netif_carrier_ok(dev) == link_status_on)
> >>>>> +             return;
> >>>>> +
> >>>>> +     phy_mac_interrupt(tp->phydev);
> >>>>> +
> >>>>> +     r8169_phylink_handler (dev);
> >>>>> +}
> >>>>> +
> >>>>> +static void rtl8169_link_timer(struct timer_list *t)
> >>>>> +{
> >>>>> +     struct rtl8169_private *tp = from_timer(tp, t, link_timer);
> >>>>> +     struct net_device *dev = tp->dev;
> >>>>> +     struct timer_list *timer = t;
> >>>>> +     unsigned long flags;
> >>>>
> >>>> flags isn't used and triggers a compiler warning. Did you even
> >>>> compile-test your patch?
> >>>>
> >>>>> +
> >>>>> +     rtl8169_check_link_status(dev);
> >>>>> +
> >>>>> +     if (timer_pending(&tp->link_timer))
> >>>>> +             return;
> >>>>> +
> >>>>> +     mod_timer(timer, jiffies + RTL8169_LINK_TIMEOUT);
> >>>>> +}
> >>>>> +
> >>>>> +static inline void rtl8169_delete_link_timer(struct net_device *dev, struct timer_list *timer)
> >>>>> +{
> >>>>> +     del_timer_sync(timer);
> >>>>> +}
> >>>>> +
> >>>>> +static inline void rtl8169_request_link_timer(struct net_device *dev)
> >>>>> +{
> >>>>> +     struct rtl8169_private *tp = netdev_priv(dev);
> >>>>> +
> >>>>> +     timer_setup(&tp->link_timer, rtl8169_link_timer, TIMER_INIT_FLAGS);
> >>>>> +}
> >>>>> +
> >>>>>  static int rtl8169_poll(struct napi_struct *napi, int budget)
> >>>>>  {
> >>>>>       struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
> >>>>> @@ -4624,21 +4709,6 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
> >>>>>       return work_done;
> >>>>>  }
> >>>>>
> >>>>> -static void r8169_phylink_handler(struct net_device *ndev)
> >>>>> -{
> >>>>> -     struct rtl8169_private *tp = netdev_priv(ndev);
> >>>>> -
> >>>>> -     if (netif_carrier_ok(ndev)) {
> >>>>> -             rtl_link_chg_patch(tp);
> >>>>> -             pm_request_resume(&tp->pci_dev->dev);
> >>>>> -     } else {
> >>>>> -             pm_runtime_idle(&tp->pci_dev->dev);
> >>>>> -     }
> >>>>> -
> >>>>> -     if (net_ratelimit())
> >>>>> -             phy_print_status(tp->phydev);
> >>>>> -}
> >>>>> -
> >>>>>  static int r8169_phy_connect(struct rtl8169_private *tp)
> >>>>>  {
> >>>>>       struct phy_device *phydev = tp->phydev;
> >>>>> @@ -4769,6 +4839,10 @@ static int rtl_open(struct net_device *dev)
> >>>>>               goto err_free_irq;
> >>>>>
> >>>>>       rtl8169_up(tp);
> >>>>> +
> >>>>> +     if (rtl_link_chng_polling_quirk(tp))
> >>>>> +             mod_timer(&tp->link_timer, jiffies + RTL8169_LINK_TIMEOUT);
> >>>>> +
> >>>>>       rtl8169_init_counter_offsets(tp);
> >>>>>       netif_start_queue(dev);
> >>>>>  out:
> >>>>> @@ -4991,7 +5065,10 @@ static const struct net_device_ops rtl_netdev_ops = {
> >>>>>
> >>>>>  static void rtl_set_irq_mask(struct rtl8169_private *tp)
> >>>>>  {
> >>>>> -     tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
> >>>>> +     tp->irq_mask = RxOK | RxErr | TxOK | TxErr;
> >>>>> +
> >>>>> +     if (!rtl_link_chng_polling_quirk(tp))
> >>>>> +             tp->irq_mask |= LinkChg;
> >>>>>
> >>>>>       if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
> >>>>>               tp->irq_mask |= SYSErr | RxOverflow | RxFIFOOver;
> >>>>> @@ -5436,6 +5513,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> >>>>>       if (pci_dev_run_wake(pdev))
> >>>>>               pm_runtime_put_sync(&pdev->dev);
> >>>>>
> >>>>> +     if (rtl_link_chng_polling_quirk(tp))
> >>>>> +             rtl8169_request_link_timer(dev);
> >>>>> +
> >>>>>       return 0;
> >>>>>  }
> >>>>>
> >>>>>
> >>>>
> >>>> All this isn't needed. If you want to switch to link status polling,
> >>>> why don't you simply let phylib do it? PHY_MAC_INTERRUPT -> PHY_POLL
> >>>
> >>> Thanks for suggestions, I tried to use PHY_POLL, it could do the same
> >>> thing that I did.
> >>>
> >>>> Your timer-based code most likely would have problems if runtime pm
> >>>> is enabled. Then you try to read the link status whilst NIC is in
> >>>> D3hot.
> >>
>
Heiner Kallweit June 7, 2021, 10:43 a.m. UTC | #8
On 07.06.2021 06:34, Koba Ko wrote:
> On Fri, Jun 4, 2021 at 7:59 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>>
>> On 04.06.2021 11:08, Koba Ko wrote:
>>> On Fri, Jun 4, 2021 at 4:23 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>>>>
>>>> On 04.06.2021 09:22, Koba Ko wrote:
>>>>> On Thu, Jun 3, 2021 at 6:00 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>>>>>>
>>>>>> On 03.06.2021 04:54, Koba Ko wrote:
>>>>>>> For RTL8106E, it's a Fast-ethernet chip.
>>>>>>> If ASPM is enabled, the link chang interrupt wouldn't be triggered
>>>>>>> immediately and must wait a very long time to get link change interrupt.
>>>>>>> Even the link change interrupt isn't triggered, the phy link is already
>>>>>>> established.
>>>>>>>
>>>>>> At first please provide a full dmesg log and output of lspci -vv.
>>>>>> Do you have the firmware for the NIC loaded? Please provide "ethtool -i <if>"
>>>>>> output.
>>>>>
>>>>> please get the logs from here,
>>>>> https://bugzilla.kernel.org/show_bug.cgi?id=213165
>>>>>
>>>>>> Does the issue affect link-down and/or link-up detection?
>>>>>> Do you have runtime pm enabled? Then, after 10s of link-down NIC goes to
>>>>>> D3hot and link-up detection triggers a PME.
>>>>>
>>>>> Issue affect link-up.
>>>>> yes, pm runtime is enabled, but rtl8106e always stays D0 even if the
>>>>> cable isn't present.
>>>>>
>>>> Then runtime pm doesn't seem to be set to "auto". Else 10s after link loss
>>>> the chip runtime-suspends and is set to D3hot.
>>>
>>> I will check this.
>>>
>>>>
>>>>>>
>>>>>>> Introduce a polling method to watch the status of phy link and disable
>>>>>>> the link change interrupt.
>>>>>>> Also add a quirk for those realtek devices have the same issue.
>>>>>>>
>>>>>> Which are the affected chip versions? Did you check with Realtek?
>>>>>> Your patch switches to polling for all Fast Ethernet versions,
>>>>>> and that's not what we want.
>>>>>
>>>>> I don't know the exact version, only the chip name 806e(pci device id 0x8165).
>>>>> ok, Im asking Realtek to help how to identify the chip issue is observed.
>>>>>
>>>> At least your Bugzilla report refers to VER_39. PCI device id 0x8136 is shared
>>>> by all fast ethernet chip versions.
>>>> Do you know other affected chip versions apart from VER_39 ?
>>>>
>>>> In the Bugzilla report you also write the issue occurs with GBit-capable
>>>> link partners. This sounds more like an aneg problem.
>>>> The issue doesn't occur with fast ethernet link partners?
>>>
>>> Issue wouldn't be observed when the link-partner has only FE capability.
>>>
>> Weird. I still have no clue how FE vs. GE support at link partner and
>> ASPM could be related. I could understand that the PHY might have a
>> problem with a GE link partner and aneg takes more time than usual.
>> But this would be completely unrelated to a potential issue with
>> ASPM on the PCIe link.
>>
>> And it's also not clear how L1_1 can cause an issue if the NIC doesn't
>> support L1 sub-states. Maybe the root cause isn't with the NIC but
>> with some other component in the PCIe path (e.g. bridge).
>>
> 
> I prefer that there's a interrupt issue when aspm is enabled on RTL8106e,
> 
>>>>
>>>> Your bug report also includes a patch that disables L1_1 only.
>>>> Not sure how this is related because the chip version we speak about
>>>> here doesn't support L1 sub-states.
>>>
>>> I have tried to enable L0s, L1 and don't disable L1 substate,
>>> but still get the issue that interrupt can't be fired immediately but
>>> the Link status is up.
>>>
>>>>
>>>>>>
>>>>>> My suspicion would be that something is system-dependent. Else I think
>>>>>> we would have seen such a report before.
>>>>> On the mainline, the aspm is disable, so you may not observe this.
>>>>> If you enable ASPM and must wait CHIP go to power-saving mode, then
>>>>> you can observe the issue.
>>>>>>
>>>>
>>>> So what you're saying is that mainline is fine and your problem is with
>>>> a downstream kernel with re-enabled ASPM? So there's nothing broken in
>>>> mainline? In mainline you have the option to re-enable ASPM states
>>>> individually via sysfs (link subdir at pci device).
>>>
>>> If enable L1_1 on the mainline, the issue could be observed too.
>>>
>> It has a reason that ASPM is disabled per default in mainline. Different
>> chip versions have different types of issues with ASPM enabled.
>> However several chip versions work fine with ASPM (also LI sub-states),
>> therefore users can re-enable ASPM states at own risk.
> 
> After consulting with REALTEK, I can identify RTL8106e by PCI_VENDOR
> REALTEK, DEVICE 0x8136, Revision 0x7.

This wasn't the question. The identification is available already.
This chip version is RTL_GIGA_MAC_VER_39.

> I would like to make PHY_POLL as default for RTL8106E on V2.
> because there's no side effects besides the cpu usage rate would be a
> little higher,
> How do you think?
> 
Did you check the actual question with Realtek? Did they confirm a hw
issue with this chip version? If yes, some more details would be helpful.

The dmesg log attached to your linked bugzilla issue lists a number of
ACPI errors. Overall I'm still not convinced that root cause of your
issue is a NIC hw bug.

>>> Thanks
>>>>
>>>>>>> Signed-off-by: Koba Ko <koba.ko@canonical.com>
>>>>>>> ---
>>>>>>>  drivers/net/ethernet/realtek/r8169.h      |   2 +
>>>>>>>  drivers/net/ethernet/realtek/r8169_main.c | 112 ++++++++++++++++++----
>>>>>>>  2 files changed, 98 insertions(+), 16 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
>>>>>>> index 2728df46ec41..a8c71adb1b57 100644
>>>>>>> --- a/drivers/net/ethernet/realtek/r8169.h
>>>>>>> +++ b/drivers/net/ethernet/realtek/r8169.h
>>>>>>> @@ -11,6 +11,8 @@
>>>>>>>  #include <linux/types.h>
>>>>>>>  #include <linux/phy.h>
>>>>>>>
>>>>>>> +#define RTL8169_LINK_TIMEOUT (1 * HZ)
>>>>>>> +
>>>>>>>  enum mac_version {
>>>>>>>       /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
>>>>>>>       RTL_GIGA_MAC_VER_02,
>>>>>>> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
>>>>>>> index 2c89cde7da1e..70aacc83d641 100644
>>>>>>> --- a/drivers/net/ethernet/realtek/r8169_main.c
>>>>>>> +++ b/drivers/net/ethernet/realtek/r8169_main.c
>>>>>>> @@ -178,6 +178,11 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
>>>>>>>
>>>>>>>  MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
>>>>>>>
>>>>>>> +static const struct pci_device_id rtl8169_linkChg_polling_enabled[] = {
>>>>>>> +     { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT },
>>>>>>> +     { 0 }
>>>>>>> +};
>>>>>>> +
>>>>>>
>>>>>> This doesn't seem to be used.
>>>>>>
>>>>>>>  enum rtl_registers {
>>>>>>>       MAC0            = 0,    /* Ethernet hardware address. */
>>>>>>>       MAC4            = 4,
>>>>>>> @@ -618,6 +623,7 @@ struct rtl8169_private {
>>>>>>>       u16 cp_cmd;
>>>>>>>       u32 irq_mask;
>>>>>>>       struct clk *clk;
>>>>>>> +     struct timer_list link_timer;
>>>>>>>
>>>>>>>       struct {
>>>>>>>               DECLARE_BITMAP(flags, RTL_FLAG_MAX);
>>>>>>> @@ -1179,6 +1185,16 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
>>>>>>>       RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
>>>>>>>  }
>>>>>>>
>>>>>>> +static int rtl_link_chng_polling_quirk(struct rtl8169_private *tp)
>>>>>>> +{
>>>>>>> +     struct pci_dev *pdev = tp->pci_dev;
>>>>>>> +
>>>>>>> +     if (pdev->vendor == 0x10ec && pdev->device == 0x8136 && !tp->supports_gmii)
>>>>>>> +             return 1;
>>>>>>> +
>>>>>>> +     return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>>  static void rtl8168dp_driver_start(struct rtl8169_private *tp)
>>>>>>>  {
>>>>>>>       r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
>>>>>>> @@ -4608,6 +4624,75 @@ static void rtl_task(struct work_struct *work)
>>>>>>>       rtnl_unlock();
>>>>>>>  }
>>>>>>>
>>>>>>> +static void r8169_phylink_handler(struct net_device *ndev)
>>>>>>> +{
>>>>>>> +     struct rtl8169_private *tp = netdev_priv(ndev);
>>>>>>> +
>>>>>>> +     if (netif_carrier_ok(ndev)) {
>>>>>>> +             rtl_link_chg_patch(tp);
>>>>>>> +             pm_request_resume(&tp->pci_dev->dev);
>>>>>>> +     } else {
>>>>>>> +             pm_runtime_idle(&tp->pci_dev->dev);
>>>>>>> +     }
>>>>>>> +
>>>>>>> +     if (net_ratelimit())
>>>>>>> +             phy_print_status(tp->phydev);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static unsigned int
>>>>>>> +rtl8169_xmii_link_ok(struct net_device *dev)
>>>>>>> +{
>>>>>>> +     struct rtl8169_private *tp = netdev_priv(dev);
>>>>>>> +     unsigned int retval;
>>>>>>> +
>>>>>>> +     retval = (RTL_R8(tp, PHYstatus) & LinkStatus) ? 1 : 0;
>>>>>>> +
>>>>>>> +     return retval;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void
>>>>>>> +rtl8169_check_link_status(struct net_device *dev)
>>>>>>> +{
>>>>>>> +     struct rtl8169_private *tp = netdev_priv(dev);
>>>>>>> +     int link_status_on;
>>>>>>> +
>>>>>>> +     link_status_on = rtl8169_xmii_link_ok(dev);
>>>>>>> +
>>>>>>> +     if (netif_carrier_ok(dev) == link_status_on)
>>>>>>> +             return;
>>>>>>> +
>>>>>>> +     phy_mac_interrupt(tp->phydev);
>>>>>>> +
>>>>>>> +     r8169_phylink_handler (dev);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void rtl8169_link_timer(struct timer_list *t)
>>>>>>> +{
>>>>>>> +     struct rtl8169_private *tp = from_timer(tp, t, link_timer);
>>>>>>> +     struct net_device *dev = tp->dev;
>>>>>>> +     struct timer_list *timer = t;
>>>>>>> +     unsigned long flags;
>>>>>>
>>>>>> flags isn't used and triggers a compiler warning. Did you even
>>>>>> compile-test your patch?
>>>>>>
>>>>>>> +
>>>>>>> +     rtl8169_check_link_status(dev);
>>>>>>> +
>>>>>>> +     if (timer_pending(&tp->link_timer))
>>>>>>> +             return;
>>>>>>> +
>>>>>>> +     mod_timer(timer, jiffies + RTL8169_LINK_TIMEOUT);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static inline void rtl8169_delete_link_timer(struct net_device *dev, struct timer_list *timer)
>>>>>>> +{
>>>>>>> +     del_timer_sync(timer);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static inline void rtl8169_request_link_timer(struct net_device *dev)
>>>>>>> +{
>>>>>>> +     struct rtl8169_private *tp = netdev_priv(dev);
>>>>>>> +
>>>>>>> +     timer_setup(&tp->link_timer, rtl8169_link_timer, TIMER_INIT_FLAGS);
>>>>>>> +}
>>>>>>> +
>>>>>>>  static int rtl8169_poll(struct napi_struct *napi, int budget)
>>>>>>>  {
>>>>>>>       struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
>>>>>>> @@ -4624,21 +4709,6 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
>>>>>>>       return work_done;
>>>>>>>  }
>>>>>>>
>>>>>>> -static void r8169_phylink_handler(struct net_device *ndev)
>>>>>>> -{
>>>>>>> -     struct rtl8169_private *tp = netdev_priv(ndev);
>>>>>>> -
>>>>>>> -     if (netif_carrier_ok(ndev)) {
>>>>>>> -             rtl_link_chg_patch(tp);
>>>>>>> -             pm_request_resume(&tp->pci_dev->dev);
>>>>>>> -     } else {
>>>>>>> -             pm_runtime_idle(&tp->pci_dev->dev);
>>>>>>> -     }
>>>>>>> -
>>>>>>> -     if (net_ratelimit())
>>>>>>> -             phy_print_status(tp->phydev);
>>>>>>> -}
>>>>>>> -
>>>>>>>  static int r8169_phy_connect(struct rtl8169_private *tp)
>>>>>>>  {
>>>>>>>       struct phy_device *phydev = tp->phydev;
>>>>>>> @@ -4769,6 +4839,10 @@ static int rtl_open(struct net_device *dev)
>>>>>>>               goto err_free_irq;
>>>>>>>
>>>>>>>       rtl8169_up(tp);
>>>>>>> +
>>>>>>> +     if (rtl_link_chng_polling_quirk(tp))
>>>>>>> +             mod_timer(&tp->link_timer, jiffies + RTL8169_LINK_TIMEOUT);
>>>>>>> +
>>>>>>>       rtl8169_init_counter_offsets(tp);
>>>>>>>       netif_start_queue(dev);
>>>>>>>  out:
>>>>>>> @@ -4991,7 +5065,10 @@ static const struct net_device_ops rtl_netdev_ops = {
>>>>>>>
>>>>>>>  static void rtl_set_irq_mask(struct rtl8169_private *tp)
>>>>>>>  {
>>>>>>> -     tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
>>>>>>> +     tp->irq_mask = RxOK | RxErr | TxOK | TxErr;
>>>>>>> +
>>>>>>> +     if (!rtl_link_chng_polling_quirk(tp))
>>>>>>> +             tp->irq_mask |= LinkChg;
>>>>>>>
>>>>>>>       if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
>>>>>>>               tp->irq_mask |= SYSErr | RxOverflow | RxFIFOOver;
>>>>>>> @@ -5436,6 +5513,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>>>>>>>       if (pci_dev_run_wake(pdev))
>>>>>>>               pm_runtime_put_sync(&pdev->dev);
>>>>>>>
>>>>>>> +     if (rtl_link_chng_polling_quirk(tp))
>>>>>>> +             rtl8169_request_link_timer(dev);
>>>>>>> +
>>>>>>>       return 0;
>>>>>>>  }
>>>>>>>
>>>>>>>
>>>>>>
>>>>>> All this isn't needed. If you want to switch to link status polling,
>>>>>> why don't you simply let phylib do it? PHY_MAC_INTERRUPT -> PHY_POLL
>>>>>
>>>>> Thanks for suggestions, I tried to use PHY_POLL, it could do the same
>>>>> thing that I did.
>>>>>
>>>>>> Your timer-based code most likely would have problems if runtime pm
>>>>>> is enabled. Then you try to read the link status whilst NIC is in
>>>>>> D3hot.
>>>>
>>
Koba Ko June 7, 2021, 11:09 a.m. UTC | #9
On Mon, Jun 7, 2021 at 6:43 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>
> On 07.06.2021 06:34, Koba Ko wrote:
> > On Fri, Jun 4, 2021 at 7:59 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
> >>
> >> On 04.06.2021 11:08, Koba Ko wrote:
> >>> On Fri, Jun 4, 2021 at 4:23 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
> >>>>
> >>>> On 04.06.2021 09:22, Koba Ko wrote:
> >>>>> On Thu, Jun 3, 2021 at 6:00 PM Heiner Kallweit <hkallweit1@gmail.com> wrote:
> >>>>>>
> >>>>>> On 03.06.2021 04:54, Koba Ko wrote:
> >>>>>>> For RTL8106E, it's a Fast-ethernet chip.
> >>>>>>> If ASPM is enabled, the link chang interrupt wouldn't be triggered
> >>>>>>> immediately and must wait a very long time to get link change interrupt.
> >>>>>>> Even the link change interrupt isn't triggered, the phy link is already
> >>>>>>> established.
> >>>>>>>
> >>>>>> At first please provide a full dmesg log and output of lspci -vv.
> >>>>>> Do you have the firmware for the NIC loaded? Please provide "ethtool -i <if>"
> >>>>>> output.
> >>>>>
> >>>>> please get the logs from here,
> >>>>> https://bugzilla.kernel.org/show_bug.cgi?id=213165
> >>>>>
> >>>>>> Does the issue affect link-down and/or link-up detection?
> >>>>>> Do you have runtime pm enabled? Then, after 10s of link-down NIC goes to
> >>>>>> D3hot and link-up detection triggers a PME.
> >>>>>
> >>>>> Issue affect link-up.
> >>>>> yes, pm runtime is enabled, but rtl8106e always stays D0 even if the
> >>>>> cable isn't present.
> >>>>>
> >>>> Then runtime pm doesn't seem to be set to "auto". Else 10s after link loss
> >>>> the chip runtime-suspends and is set to D3hot.
> >>>
> >>> I will check this.
> >>>
> >>>>
> >>>>>>
> >>>>>>> Introduce a polling method to watch the status of phy link and disable
> >>>>>>> the link change interrupt.
> >>>>>>> Also add a quirk for those realtek devices have the same issue.
> >>>>>>>
> >>>>>> Which are the affected chip versions? Did you check with Realtek?
> >>>>>> Your patch switches to polling for all Fast Ethernet versions,
> >>>>>> and that's not what we want.
> >>>>>
> >>>>> I don't know the exact version, only the chip name 806e(pci device id 0x8165).
> >>>>> ok, Im asking Realtek to help how to identify the chip issue is observed.
> >>>>>
> >>>> At least your Bugzilla report refers to VER_39. PCI device id 0x8136 is shared
> >>>> by all fast ethernet chip versions.
> >>>> Do you know other affected chip versions apart from VER_39 ?
> >>>>
> >>>> In the Bugzilla report you also write the issue occurs with GBit-capable
> >>>> link partners. This sounds more like an aneg problem.
> >>>> The issue doesn't occur with fast ethernet link partners?
> >>>
> >>> Issue wouldn't be observed when the link-partner has only FE capability.
> >>>
> >> Weird. I still have no clue how FE vs. GE support at link partner and
> >> ASPM could be related. I could understand that the PHY might have a
> >> problem with a GE link partner and aneg takes more time than usual.
> >> But this would be completely unrelated to a potential issue with
> >> ASPM on the PCIe link.
> >>
> >> And it's also not clear how L1_1 can cause an issue if the NIC doesn't
> >> support L1 sub-states. Maybe the root cause isn't with the NIC but
> >> with some other component in the PCIe path (e.g. bridge).
> >>
> >
> > I prefer that there's a interrupt issue when aspm is enabled on RTL8106e,
> >
> >>>>
> >>>> Your bug report also includes a patch that disables L1_1 only.
> >>>> Not sure how this is related because the chip version we speak about
> >>>> here doesn't support L1 sub-states.
> >>>
> >>> I have tried to enable L0s, L1 and don't disable L1 substate,
> >>> but still get the issue that interrupt can't be fired immediately but
> >>> the Link status is up.
> >>>
> >>>>
> >>>>>>
> >>>>>> My suspicion would be that something is system-dependent. Else I think
> >>>>>> we would have seen such a report before.
> >>>>> On the mainline, the aspm is disable, so you may not observe this.
> >>>>> If you enable ASPM and must wait CHIP go to power-saving mode, then
> >>>>> you can observe the issue.
> >>>>>>
> >>>>
> >>>> So what you're saying is that mainline is fine and your problem is with
> >>>> a downstream kernel with re-enabled ASPM? So there's nothing broken in
> >>>> mainline? In mainline you have the option to re-enable ASPM states
> >>>> individually via sysfs (link subdir at pci device).
> >>>
> >>> If enable L1_1 on the mainline, the issue could be observed too.
> >>>
> >> It has a reason that ASPM is disabled per default in mainline. Different
> >> chip versions have different types of issues with ASPM enabled.
> >> However several chip versions work fine with ASPM (also LI sub-states),
> >> therefore users can re-enable ASPM states at own risk.
> >
> > After consulting with REALTEK, I can identify RTL8106e by PCI_VENDOR
> > REALTEK, DEVICE 0x8136, Revision 0x7.
>
> This wasn't the question. The identification is available already.
> This chip version is RTL_GIGA_MAC_VER_39.
>
> > I would like to make PHY_POLL as default for RTL8106E on V2.
> > because there's no side effects besides the cpu usage rate would be a
> > little higher,
> > How do you think?
> >
> Did you check the actual question with Realtek? Did they confirm a hw
> issue with this chip version? If yes, some more details would be helpful.
>

yes, but Realtek said there's no issue on ASPM and AN.
They also didn't answer why disabling an unsupported L1_1 would cause
this issue.

>
> The dmesg log attached to your linked bugzilla issue lists a number of
> ACPI errors. Overall I'm still not convinced that root cause of your
> issue is a NIC hw bug.
>

I also upgraded the BIOS to the last and still can get the issue.

> >>> Thanks
> >>>>
> >>>>>>> Signed-off-by: Koba Ko <koba.ko@canonical.com>
> >>>>>>> ---
> >>>>>>>  drivers/net/ethernet/realtek/r8169.h      |   2 +
> >>>>>>>  drivers/net/ethernet/realtek/r8169_main.c | 112 ++++++++++++++++++----
> >>>>>>>  2 files changed, 98 insertions(+), 16 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
> >>>>>>> index 2728df46ec41..a8c71adb1b57 100644
> >>>>>>> --- a/drivers/net/ethernet/realtek/r8169.h
> >>>>>>> +++ b/drivers/net/ethernet/realtek/r8169.h
> >>>>>>> @@ -11,6 +11,8 @@
> >>>>>>>  #include <linux/types.h>
> >>>>>>>  #include <linux/phy.h>
> >>>>>>>
> >>>>>>> +#define RTL8169_LINK_TIMEOUT (1 * HZ)
> >>>>>>> +
> >>>>>>>  enum mac_version {
> >>>>>>>       /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
> >>>>>>>       RTL_GIGA_MAC_VER_02,
> >>>>>>> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> >>>>>>> index 2c89cde7da1e..70aacc83d641 100644
> >>>>>>> --- a/drivers/net/ethernet/realtek/r8169_main.c
> >>>>>>> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> >>>>>>> @@ -178,6 +178,11 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
> >>>>>>>
> >>>>>>>  MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
> >>>>>>>
> >>>>>>> +static const struct pci_device_id rtl8169_linkChg_polling_enabled[] = {
> >>>>>>> +     { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT },
> >>>>>>> +     { 0 }
> >>>>>>> +};
> >>>>>>> +
> >>>>>>
> >>>>>> This doesn't seem to be used.
> >>>>>>
> >>>>>>>  enum rtl_registers {
> >>>>>>>       MAC0            = 0,    /* Ethernet hardware address. */
> >>>>>>>       MAC4            = 4,
> >>>>>>> @@ -618,6 +623,7 @@ struct rtl8169_private {
> >>>>>>>       u16 cp_cmd;
> >>>>>>>       u32 irq_mask;
> >>>>>>>       struct clk *clk;
> >>>>>>> +     struct timer_list link_timer;
> >>>>>>>
> >>>>>>>       struct {
> >>>>>>>               DECLARE_BITMAP(flags, RTL_FLAG_MAX);
> >>>>>>> @@ -1179,6 +1185,16 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
> >>>>>>>       RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
> >>>>>>>  }
> >>>>>>>
> >>>>>>> +static int rtl_link_chng_polling_quirk(struct rtl8169_private *tp)
> >>>>>>> +{
> >>>>>>> +     struct pci_dev *pdev = tp->pci_dev;
> >>>>>>> +
> >>>>>>> +     if (pdev->vendor == 0x10ec && pdev->device == 0x8136 && !tp->supports_gmii)
> >>>>>>> +             return 1;
> >>>>>>> +
> >>>>>>> +     return 0;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>>  static void rtl8168dp_driver_start(struct rtl8169_private *tp)
> >>>>>>>  {
> >>>>>>>       r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
> >>>>>>> @@ -4608,6 +4624,75 @@ static void rtl_task(struct work_struct *work)
> >>>>>>>       rtnl_unlock();
> >>>>>>>  }
> >>>>>>>
> >>>>>>> +static void r8169_phylink_handler(struct net_device *ndev)
> >>>>>>> +{
> >>>>>>> +     struct rtl8169_private *tp = netdev_priv(ndev);
> >>>>>>> +
> >>>>>>> +     if (netif_carrier_ok(ndev)) {
> >>>>>>> +             rtl_link_chg_patch(tp);
> >>>>>>> +             pm_request_resume(&tp->pci_dev->dev);
> >>>>>>> +     } else {
> >>>>>>> +             pm_runtime_idle(&tp->pci_dev->dev);
> >>>>>>> +     }
> >>>>>>> +
> >>>>>>> +     if (net_ratelimit())
> >>>>>>> +             phy_print_status(tp->phydev);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static unsigned int
> >>>>>>> +rtl8169_xmii_link_ok(struct net_device *dev)
> >>>>>>> +{
> >>>>>>> +     struct rtl8169_private *tp = netdev_priv(dev);
> >>>>>>> +     unsigned int retval;
> >>>>>>> +
> >>>>>>> +     retval = (RTL_R8(tp, PHYstatus) & LinkStatus) ? 1 : 0;
> >>>>>>> +
> >>>>>>> +     return retval;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static void
> >>>>>>> +rtl8169_check_link_status(struct net_device *dev)
> >>>>>>> +{
> >>>>>>> +     struct rtl8169_private *tp = netdev_priv(dev);
> >>>>>>> +     int link_status_on;
> >>>>>>> +
> >>>>>>> +     link_status_on = rtl8169_xmii_link_ok(dev);
> >>>>>>> +
> >>>>>>> +     if (netif_carrier_ok(dev) == link_status_on)
> >>>>>>> +             return;
> >>>>>>> +
> >>>>>>> +     phy_mac_interrupt(tp->phydev);
> >>>>>>> +
> >>>>>>> +     r8169_phylink_handler (dev);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static void rtl8169_link_timer(struct timer_list *t)
> >>>>>>> +{
> >>>>>>> +     struct rtl8169_private *tp = from_timer(tp, t, link_timer);
> >>>>>>> +     struct net_device *dev = tp->dev;
> >>>>>>> +     struct timer_list *timer = t;
> >>>>>>> +     unsigned long flags;
> >>>>>>
> >>>>>> flags isn't used and triggers a compiler warning. Did you even
> >>>>>> compile-test your patch?
> >>>>>>
> >>>>>>> +
> >>>>>>> +     rtl8169_check_link_status(dev);
> >>>>>>> +
> >>>>>>> +     if (timer_pending(&tp->link_timer))
> >>>>>>> +             return;
> >>>>>>> +
> >>>>>>> +     mod_timer(timer, jiffies + RTL8169_LINK_TIMEOUT);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static inline void rtl8169_delete_link_timer(struct net_device *dev, struct timer_list *timer)
> >>>>>>> +{
> >>>>>>> +     del_timer_sync(timer);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static inline void rtl8169_request_link_timer(struct net_device *dev)
> >>>>>>> +{
> >>>>>>> +     struct rtl8169_private *tp = netdev_priv(dev);
> >>>>>>> +
> >>>>>>> +     timer_setup(&tp->link_timer, rtl8169_link_timer, TIMER_INIT_FLAGS);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>>  static int rtl8169_poll(struct napi_struct *napi, int budget)
> >>>>>>>  {
> >>>>>>>       struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
> >>>>>>> @@ -4624,21 +4709,6 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
> >>>>>>>       return work_done;
> >>>>>>>  }
> >>>>>>>
> >>>>>>> -static void r8169_phylink_handler(struct net_device *ndev)
> >>>>>>> -{
> >>>>>>> -     struct rtl8169_private *tp = netdev_priv(ndev);
> >>>>>>> -
> >>>>>>> -     if (netif_carrier_ok(ndev)) {
> >>>>>>> -             rtl_link_chg_patch(tp);
> >>>>>>> -             pm_request_resume(&tp->pci_dev->dev);
> >>>>>>> -     } else {
> >>>>>>> -             pm_runtime_idle(&tp->pci_dev->dev);
> >>>>>>> -     }
> >>>>>>> -
> >>>>>>> -     if (net_ratelimit())
> >>>>>>> -             phy_print_status(tp->phydev);
> >>>>>>> -}
> >>>>>>> -
> >>>>>>>  static int r8169_phy_connect(struct rtl8169_private *tp)
> >>>>>>>  {
> >>>>>>>       struct phy_device *phydev = tp->phydev;
> >>>>>>> @@ -4769,6 +4839,10 @@ static int rtl_open(struct net_device *dev)
> >>>>>>>               goto err_free_irq;
> >>>>>>>
> >>>>>>>       rtl8169_up(tp);
> >>>>>>> +
> >>>>>>> +     if (rtl_link_chng_polling_quirk(tp))
> >>>>>>> +             mod_timer(&tp->link_timer, jiffies + RTL8169_LINK_TIMEOUT);
> >>>>>>> +
> >>>>>>>       rtl8169_init_counter_offsets(tp);
> >>>>>>>       netif_start_queue(dev);
> >>>>>>>  out:
> >>>>>>> @@ -4991,7 +5065,10 @@ static const struct net_device_ops rtl_netdev_ops = {
> >>>>>>>
> >>>>>>>  static void rtl_set_irq_mask(struct rtl8169_private *tp)
> >>>>>>>  {
> >>>>>>> -     tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
> >>>>>>> +     tp->irq_mask = RxOK | RxErr | TxOK | TxErr;
> >>>>>>> +
> >>>>>>> +     if (!rtl_link_chng_polling_quirk(tp))
> >>>>>>> +             tp->irq_mask |= LinkChg;
> >>>>>>>
> >>>>>>>       if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
> >>>>>>>               tp->irq_mask |= SYSErr | RxOverflow | RxFIFOOver;
> >>>>>>> @@ -5436,6 +5513,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> >>>>>>>       if (pci_dev_run_wake(pdev))
> >>>>>>>               pm_runtime_put_sync(&pdev->dev);
> >>>>>>>
> >>>>>>> +     if (rtl_link_chng_polling_quirk(tp))
> >>>>>>> +             rtl8169_request_link_timer(dev);
> >>>>>>> +
> >>>>>>>       return 0;
> >>>>>>>  }
> >>>>>>>
> >>>>>>>
> >>>>>>
> >>>>>> All this isn't needed. If you want to switch to link status polling,
> >>>>>> why don't you simply let phylib do it? PHY_MAC_INTERRUPT -> PHY_POLL
> >>>>>
> >>>>> Thanks for suggestions, I tried to use PHY_POLL, it could do the same
> >>>>> thing that I did.
> >>>>>
> >>>>>> Your timer-based code most likely would have problems if runtime pm
> >>>>>> is enabled. Then you try to read the link status whilst NIC is in
> >>>>>> D3hot.
> >>>>
> >>
>
David Laight June 7, 2021, 12:32 p.m. UTC | #10
From: Koba Ko
> Sent: 07 June 2021 05:35
...
> After consulting with REALTEK, I can identify RTL8106e by PCI_VENDOR
> REALTEK, DEVICE 0x8136, Revision 0x7.
> I would like to make PHY_POLL as default for RTL8106E on V2.
> because there's no side effects besides the cpu usage rate would be a
> little higher,
> How do you think?

If reading the PHY registers involves a software bit-bang
of an MII register (rather than, say, a sleep for interrupt
while the MAC unit does the bit-bang) then you can clobber
interrupt latency because of all the time spent spinning.

While this is less of a problem on multi-cpu systems I have
seen it result in ethernet packet loss on old systems.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Andrew Lunn June 7, 2021, 12:49 p.m. UTC | #11
On Mon, Jun 07, 2021 at 12:32:29PM +0000, David Laight wrote:
> From: Koba Ko
> > Sent: 07 June 2021 05:35
> ...
> > After consulting with REALTEK, I can identify RTL8106e by PCI_VENDOR
> > REALTEK, DEVICE 0x8136, Revision 0x7.
> > I would like to make PHY_POLL as default for RTL8106E on V2.
> > because there's no side effects besides the cpu usage rate would be a
> > little higher,
> > How do you think?
> 
> If reading the PHY registers involves a software bit-bang
> of an MII register (rather than, say, a sleep for interrupt
> while the MAC unit does the bit-bang) then you can clobber
> interrupt latency because of all the time spent spinning.

That is not what PHY IRQ/POLL means in the PHY subsystem.

Many PHYs don't actually have there interrupt output connected to a
GPIO. This is partially because 803.2 C22 and C45 standards don't
define interrupts. Each vendor which supports interrupts uses
proprietary registers. So by default, the PHY subsystem will poll the
status of the PHY once per second to see if the link has changed
state. If the combination of PHY hardware, board hardware and PHY
driver does have interrupts, the PHY subsystem will not poll, but wait
for an interrupt, and then check the status of the link.

As for MII bus masters, i only know of one which is interrupt driven,
rather than polled IO, for completion. The hardware is clocking out 64
bits at 2.5MHz. So it is done rather quickly. I profiled that one
using interrupts, and the overhead of dealing with the interrupt is
bigger than polling.

    Andrew
David Laight June 7, 2021, 1:17 p.m. UTC | #12
From: Andrew Lunn
> Sent: 07 June 2021 13:50
> 
> On Mon, Jun 07, 2021 at 12:32:29PM +0000, David Laight wrote:
> > From: Koba Ko
> > > Sent: 07 June 2021 05:35
> > ...
> > > After consulting with REALTEK, I can identify RTL8106e by PCI_VENDOR
> > > REALTEK, DEVICE 0x8136, Revision 0x7.
> > > I would like to make PHY_POLL as default for RTL8106E on V2.
> > > because there's no side effects besides the cpu usage rate would be a
> > > little higher,
> > > How do you think?
> >
> > If reading the PHY registers involves a software bit-bang
> > of an MII register (rather than, say, a sleep for interrupt
> > while the MAC unit does the bit-bang) then you can clobber
> > interrupt latency because of all the time spent spinning.
> 
> That is not what PHY IRQ/POLL means in the PHY subsystem.
> 
> Many PHYs don't actually have there interrupt output connected to a
> GPIO. This is partially because 803.2 C22 and C45 standards don't
> define interrupts. Each vendor which supports interrupts uses
> proprietary registers. So by default, the PHY subsystem will poll the
> status of the PHY once per second to see if the link has changed
> state. If the combination of PHY hardware, board hardware and PHY
> driver does have interrupts, the PHY subsystem will not poll, but wait
> for an interrupt, and then check the status of the link.

I know. I might be 30 years since I wrote anything to read MII
but I don't remember seeing anything that made it less horrid.

One of the MAC units (probably AMD lance based) could be configured
to repeatedly read one PHY register and generate a MAC interrupt
if it changed - but I've not seen that on some later MAC chips.

> As for MII bus masters, i only know of one which is interrupt driven,
> rather than polled IO, for completion. The hardware is clocking out 64
> bits at 2.5MHz. So it is done rather quickly. I profiled that one
> using interrupts, and the overhead of dealing with the interrupt is
> bigger than polling.

64 bits at 2.5MHz is some 64000 cpu clocks - not inconsiderable.

It has to be said that I don't know how to solve the delays
associated with software bit-bang (apart from persuading the
hardware engineers it isn't a good idea).
With my 'hardware engineer' hat on (I'm currently (ir)responsible
for some FPGA internals as well as the drivers) the logic to
do things like I2C (etc) reads and writes from fpga memory
sits in a tiny corner of a modern device.

One possibility for 'slow polls' is to do them slowly!
One edge per timer tick - although 'tickless' probably
kills that.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
index 2728df46ec41..a8c71adb1b57 100644
--- a/drivers/net/ethernet/realtek/r8169.h
+++ b/drivers/net/ethernet/realtek/r8169.h
@@ -11,6 +11,8 @@ 
 #include <linux/types.h>
 #include <linux/phy.h>
 
+#define RTL8169_LINK_TIMEOUT (1 * HZ)
+
 enum mac_version {
 	/* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
 	RTL_GIGA_MAC_VER_02,
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 2c89cde7da1e..70aacc83d641 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -178,6 +178,11 @@  static const struct pci_device_id rtl8169_pci_tbl[] = {
 
 MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
 
+static const struct pci_device_id rtl8169_linkChg_polling_enabled[] = {
+	{ PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT },
+	{ 0 }
+};
+
 enum rtl_registers {
 	MAC0		= 0,	/* Ethernet hardware address. */
 	MAC4		= 4,
@@ -618,6 +623,7 @@  struct rtl8169_private {
 	u16 cp_cmd;
 	u32 irq_mask;
 	struct clk *clk;
+	struct timer_list link_timer;
 
 	struct {
 		DECLARE_BITMAP(flags, RTL_FLAG_MAX);
@@ -1179,6 +1185,16 @@  static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
 	RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
 }
 
+static int rtl_link_chng_polling_quirk(struct rtl8169_private *tp)
+{
+	struct pci_dev *pdev = tp->pci_dev;
+
+	if (pdev->vendor == 0x10ec && pdev->device == 0x8136 && !tp->supports_gmii)
+		return 1;
+
+	return 0;
+}
+
 static void rtl8168dp_driver_start(struct rtl8169_private *tp)
 {
 	r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
@@ -4608,6 +4624,75 @@  static void rtl_task(struct work_struct *work)
 	rtnl_unlock();
 }
 
+static void r8169_phylink_handler(struct net_device *ndev)
+{
+	struct rtl8169_private *tp = netdev_priv(ndev);
+
+	if (netif_carrier_ok(ndev)) {
+		rtl_link_chg_patch(tp);
+		pm_request_resume(&tp->pci_dev->dev);
+	} else {
+		pm_runtime_idle(&tp->pci_dev->dev);
+	}
+
+	if (net_ratelimit())
+		phy_print_status(tp->phydev);
+}
+
+static unsigned int
+rtl8169_xmii_link_ok(struct net_device *dev)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+	unsigned int retval;
+
+	retval = (RTL_R8(tp, PHYstatus) & LinkStatus) ? 1 : 0;
+
+	return retval;
+}
+
+static void
+rtl8169_check_link_status(struct net_device *dev)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+	int link_status_on;
+
+	link_status_on = rtl8169_xmii_link_ok(dev);
+
+	if (netif_carrier_ok(dev) == link_status_on)
+		return;
+
+	phy_mac_interrupt(tp->phydev);
+
+	r8169_phylink_handler (dev);
+}
+
+static void rtl8169_link_timer(struct timer_list *t)
+{
+	struct rtl8169_private *tp = from_timer(tp, t, link_timer);
+	struct net_device *dev = tp->dev;
+	struct timer_list *timer = t;
+	unsigned long flags;
+
+	rtl8169_check_link_status(dev);
+
+	if (timer_pending(&tp->link_timer))
+		return;
+
+	mod_timer(timer, jiffies + RTL8169_LINK_TIMEOUT);
+}
+
+static inline void rtl8169_delete_link_timer(struct net_device *dev, struct timer_list *timer)
+{
+	del_timer_sync(timer);
+}
+
+static inline void rtl8169_request_link_timer(struct net_device *dev)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+
+	timer_setup(&tp->link_timer, rtl8169_link_timer, TIMER_INIT_FLAGS);
+}
+
 static int rtl8169_poll(struct napi_struct *napi, int budget)
 {
 	struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
@@ -4624,21 +4709,6 @@  static int rtl8169_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-static void r8169_phylink_handler(struct net_device *ndev)
-{
-	struct rtl8169_private *tp = netdev_priv(ndev);
-
-	if (netif_carrier_ok(ndev)) {
-		rtl_link_chg_patch(tp);
-		pm_request_resume(&tp->pci_dev->dev);
-	} else {
-		pm_runtime_idle(&tp->pci_dev->dev);
-	}
-
-	if (net_ratelimit())
-		phy_print_status(tp->phydev);
-}
-
 static int r8169_phy_connect(struct rtl8169_private *tp)
 {
 	struct phy_device *phydev = tp->phydev;
@@ -4769,6 +4839,10 @@  static int rtl_open(struct net_device *dev)
 		goto err_free_irq;
 
 	rtl8169_up(tp);
+
+	if (rtl_link_chng_polling_quirk(tp))
+		mod_timer(&tp->link_timer, jiffies + RTL8169_LINK_TIMEOUT);
+
 	rtl8169_init_counter_offsets(tp);
 	netif_start_queue(dev);
 out:
@@ -4991,7 +5065,10 @@  static const struct net_device_ops rtl_netdev_ops = {
 
 static void rtl_set_irq_mask(struct rtl8169_private *tp)
 {
-	tp->irq_mask = RxOK | RxErr | TxOK | TxErr | LinkChg;
+	tp->irq_mask = RxOK | RxErr | TxOK | TxErr;
+
+	if (!rtl_link_chng_polling_quirk(tp))
+		tp->irq_mask |= LinkChg;
 
 	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
 		tp->irq_mask |= SYSErr | RxOverflow | RxFIFOOver;
@@ -5436,6 +5513,9 @@  static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (pci_dev_run_wake(pdev))
 		pm_runtime_put_sync(&pdev->dev);
 
+	if (rtl_link_chng_polling_quirk(tp))
+		rtl8169_request_link_timer(dev);
+
 	return 0;
 }