diff mbox series

[net] net: phy: Don't trigger state machine while in suspend

Message ID 688f559346ea747d3b47a4d16ef8277e093f9ebe.1653556322.git.lukas@wunner.de (mailing list archive)
State Accepted
Commit 1758bde2e4aa5ff188d53e7d9d388bbb7e12eebb
Headers show
Series [net] net: phy: Don't trigger state machine while in suspend | expand

Commit Message

Lukas Wunner May 26, 2022, 9:28 a.m. UTC
Upon system sleep, mdio_bus_phy_suspend() stops the phy_state_machine(),
but subsequent interrupts may retrigger it:

They may have been left enabled to facilitate wakeup and are not
quiesced until the ->suspend_noirq() phase.  Unwanted interrupts may
hence occur between mdio_bus_phy_suspend() and dpm_suspend_noirq(),
as well as between dpm_resume_noirq() and mdio_bus_phy_resume().

Amend phy_interrupt() to avoid triggering the state machine if the PHY
is suspended.  Signal wakeup instead if the attached net_device or its
parent has been configured as a wakeup source.  (Those conditions are
identical to mdio_bus_phy_may_suspend().)  Postpone handling of the
interrupt until the PHY has resumed.

Before stopping the phy_state_machine() in mdio_bus_phy_suspend(),
wait for a concurrent phy_interrupt() to run to completion.  That is
necessary because phy_interrupt() may have checked the PHY's suspend
status before the system sleep transition commenced and it may thus
retrigger the state machine after it was stopped.

Likewise, after re-enabling interrupt handling in mdio_bus_phy_resume(),
wait for a concurrent phy_interrupt() to complete to ensure that
interrupts which it postponed are properly rerun.

Fixes: 1ce8b37241ed ("usbnet: smsc95xx: Forward PHY interrupts to PHY driver to avoid polling")
Link: https://lore.kernel.org/netdev/a5315a8a-32c2-962f-f696-de9a26d30091@samsung.com/
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
---
 drivers/net/phy/phy.c        | 23 +++++++++++++++++++++++
 drivers/net/phy/phy_device.c | 23 +++++++++++++++++++++++
 include/linux/phy.h          |  6 ++++++
 3 files changed, 52 insertions(+)

Comments

Marek Szyprowski May 26, 2022, 10:34 a.m. UTC | #1
Hi Lukas,

On 26.05.2022 11:28, Lukas Wunner wrote:
> Upon system sleep, mdio_bus_phy_suspend() stops the phy_state_machine(),
> but subsequent interrupts may retrigger it:
>
> They may have been left enabled to facilitate wakeup and are not
> quiesced until the ->suspend_noirq() phase.  Unwanted interrupts may
> hence occur between mdio_bus_phy_suspend() and dpm_suspend_noirq(),
> as well as between dpm_resume_noirq() and mdio_bus_phy_resume().
>
> Amend phy_interrupt() to avoid triggering the state machine if the PHY
> is suspended.  Signal wakeup instead if the attached net_device or its
> parent has been configured as a wakeup source.  (Those conditions are
> identical to mdio_bus_phy_may_suspend().)  Postpone handling of the
> interrupt until the PHY has resumed.
>
> Before stopping the phy_state_machine() in mdio_bus_phy_suspend(),
> wait for a concurrent phy_interrupt() to run to completion.  That is
> necessary because phy_interrupt() may have checked the PHY's suspend
> status before the system sleep transition commenced and it may thus
> retrigger the state machine after it was stopped.
>
> Likewise, after re-enabling interrupt handling in mdio_bus_phy_resume(),
> wait for a concurrent phy_interrupt() to complete to ensure that
> interrupts which it postponed are properly rerun.
>
> Fixes: 1ce8b37241ed ("usbnet: smsc95xx: Forward PHY interrupts to PHY driver to avoid polling")

I'm not sure if this is a right commit here. It revealed the issue, but 
it is not directly related to the net/phy code.

> Link: https://lore.kernel.org/netdev/a5315a8a-32c2-962f-f696-de9a26d30091@samsung.com/
> Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Signed-off-by: Lukas Wunner <lukas@wunner.de>

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>

> ---
>   drivers/net/phy/phy.c        | 23 +++++++++++++++++++++++
>   drivers/net/phy/phy_device.c | 23 +++++++++++++++++++++++
>   include/linux/phy.h          |  6 ++++++
>   3 files changed, 52 insertions(+)
>
> diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
> index ef62f357b76d..8d3ee3a6495b 100644
> --- a/drivers/net/phy/phy.c
> +++ b/drivers/net/phy/phy.c
> @@ -31,6 +31,7 @@
>   #include <linux/io.h>
>   #include <linux/uaccess.h>
>   #include <linux/atomic.h>
> +#include <linux/suspend.h>
>   #include <net/netlink.h>
>   #include <net/genetlink.h>
>   #include <net/sock.h>
> @@ -976,6 +977,28 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
>   	struct phy_driver *drv = phydev->drv;
>   	irqreturn_t ret;
>   
> +	/* Wakeup interrupts may occur during a system sleep transition.
> +	 * Postpone handling until the PHY has resumed.
> +	 */
> +	if (IS_ENABLED(CONFIG_PM_SLEEP) && phydev->irq_suspended) {
> +		struct net_device *netdev = phydev->attached_dev;
> +
> +		if (netdev) {
> +			struct device *parent = netdev->dev.parent;
> +
> +			if (netdev->wol_enabled)
> +				pm_system_wakeup();
> +			else if (device_may_wakeup(&netdev->dev))
> +				pm_wakeup_dev_event(&netdev->dev, 0, true);
> +			else if (parent && device_may_wakeup(parent))
> +				pm_wakeup_dev_event(parent, 0, true);
> +		}
> +
> +		phydev->irq_rerun = 1;
> +		disable_irq_nosync(irq);
> +		return IRQ_HANDLED;
> +	}
> +
>   	mutex_lock(&phydev->lock);
>   	ret = drv->handle_interrupt(phydev);
>   	mutex_unlock(&phydev->lock);
> diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
> index 431a8719c635..46acddd865a7 100644
> --- a/drivers/net/phy/phy_device.c
> +++ b/drivers/net/phy/phy_device.c
> @@ -278,6 +278,15 @@ static __maybe_unused int mdio_bus_phy_suspend(struct device *dev)
>   	if (phydev->mac_managed_pm)
>   		return 0;
>   
> +	/* Wakeup interrupts may occur during the system sleep transition when
> +	 * the PHY is inaccessible. Set flag to postpone handling until the PHY
> +	 * has resumed. Wait for concurrent interrupt handler to complete.
> +	 */
> +	if (phy_interrupt_is_valid(phydev)) {
> +		phydev->irq_suspended = 1;
> +		synchronize_irq(phydev->irq);
> +	}
> +
>   	/* We must stop the state machine manually, otherwise it stops out of
>   	 * control, possibly with the phydev->lock held. Upon resume, netdev
>   	 * may call phy routines that try to grab the same lock, and that may
> @@ -315,6 +324,20 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev)
>   	if (ret < 0)
>   		return ret;
>   no_resume:
> +	if (phy_interrupt_is_valid(phydev)) {
> +		phydev->irq_suspended = 0;
> +		synchronize_irq(phydev->irq);
> +
> +		/* Rerun interrupts which were postponed by phy_interrupt()
> +		 * because they occurred during the system sleep transition.
> +		 */
> +		if (phydev->irq_rerun) {
> +			phydev->irq_rerun = 0;
> +			enable_irq(phydev->irq);
> +			irq_wake_thread(phydev->irq, phydev);
> +		}
> +	}
> +
>   	if (phydev->attached_dev && phydev->adjust_link)
>   		phy_start_machine(phydev);
>   
> diff --git a/include/linux/phy.h b/include/linux/phy.h
> index 508f1149665b..b09f7d36cff2 100644
> --- a/include/linux/phy.h
> +++ b/include/linux/phy.h
> @@ -572,6 +572,10 @@ struct macsec_ops;
>    * @mdix_ctrl: User setting of crossover
>    * @pma_extable: Cached value of PMA/PMD Extended Abilities Register
>    * @interrupts: Flag interrupts have been enabled
> + * @irq_suspended: Flag indicating PHY is suspended and therefore interrupt
> + *                 handling shall be postponed until PHY has resumed
> + * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended,
> + *             requiring a rerun of the interrupt handler after resume
>    * @interface: enum phy_interface_t value
>    * @skb: Netlink message for cable diagnostics
>    * @nest: Netlink nest used for cable diagnostics
> @@ -626,6 +630,8 @@ struct phy_device {
>   
>   	/* Interrupts are enabled */
>   	unsigned interrupts:1;
> +	unsigned irq_suspended:1;
> +	unsigned irq_rerun:1;
>   
>   	enum phy_state state;
>   

Best regards
Andrew Lunn June 6, 2022, 1:40 a.m. UTC | #2
> +	if (phy_interrupt_is_valid(phydev)) {
> +		phydev->irq_suspended = 0;
> +		synchronize_irq(phydev->irq);
> +
> +		/* Rerun interrupts which were postponed by phy_interrupt()
> +		 * because they occurred during the system sleep transition.
> +		 */
> +		if (phydev->irq_rerun) {
> +			phydev->irq_rerun = 0;
> +			enable_irq(phydev->irq);
> +			irq_wake_thread(phydev->irq, phydev);
> +		}
> +	}

As i said in a previous thread, PHY interrupts are generally level,
not edge. So when you call enable_irq(phydev->irq), doesn't it
immediately fire? You need to first call the handler, and then
re-enable the interrupt.

      Andrew
Lukas Wunner June 6, 2022, 5:53 a.m. UTC | #3
On Mon, Jun 06, 2022 at 03:40:49AM +0200, Andrew Lunn wrote:
> > +	if (phy_interrupt_is_valid(phydev)) {
> > +		phydev->irq_suspended = 0;
> > +		synchronize_irq(phydev->irq);
> > +
> > +		/* Rerun interrupts which were postponed by phy_interrupt()
> > +		 * because they occurred during the system sleep transition.
> > +		 */
> > +		if (phydev->irq_rerun) {
> > +			phydev->irq_rerun = 0;
> > +			enable_irq(phydev->irq);
> > +			irq_wake_thread(phydev->irq, phydev);
> > +		}
> > +	}
> 
> As i said in a previous thread, PHY interrupts are generally level,
> not edge. So when you call enable_irq(phydev->irq), doesn't it
> immediately fire?

Yes, if the interrupt is indeed level and the PHY is capable of
remembering that an interrupt occurred while the system was suspended
or was about to be suspended.

The irq_wake_thread() ensures that the IRQ handler is called,
should one of those conditions *not* be met.

Recall that phylib uses irq_default_primary_handler() as hardirq
handler.  That handler does nothing else but wake the IRQ thread,
which runs phy->handle_interrupt() in task context.

The irq_wake_thread() above likewise wakes the IRQ thread,
i.e. it tells the scheduler to put it on the run queue.

If, as you say, the interrupt is level and fires upon enable_irq(),
the result is that the scheduler is told twice to put the IRQ thread
on the run queue.  Usually this will happen faster than the IRQ thread
actually gets scheduled, so it will only run once.

In the unlikely event that the IRQ thread gets scheduled before the
call to irq_wake_thread(), the IRQ thread will run twice.
However, that's harmless.  IRQ handlers can cope with that.


> You need to first call the handler, and then re-enable the interrupt.

I guess I could call phy_interrupt() before the enable_irq(),
in lieu of irq_wake_thread().

However, it would mean that I'd invoke the IRQ handler behind the back
of the generic irq code.  That doesn't feel quite right.
Calling irq_wake_thread() is the correct way if one wants to be
compliant with the generic irq code's expectations.

If you feel strongly about it I can make that change but I would
advise against it.  Let me know what you think.

Thanks,

Lukas
Andrew Lunn June 6, 2022, 12:19 p.m. UTC | #4
On Mon, Jun 06, 2022 at 07:53:20AM +0200, Lukas Wunner wrote:
> On Mon, Jun 06, 2022 at 03:40:49AM +0200, Andrew Lunn wrote:
> > > +	if (phy_interrupt_is_valid(phydev)) {
> > > +		phydev->irq_suspended = 0;
> > > +		synchronize_irq(phydev->irq);
> > > +
> > > +		/* Rerun interrupts which were postponed by phy_interrupt()
> > > +		 * because they occurred during the system sleep transition.
> > > +		 */
> > > +		if (phydev->irq_rerun) {
> > > +			phydev->irq_rerun = 0;
> > > +			enable_irq(phydev->irq);
> > > +			irq_wake_thread(phydev->irq, phydev);
> > > +		}
> > > +	}
> > 
> > As i said in a previous thread, PHY interrupts are generally level,
> > not edge. So when you call enable_irq(phydev->irq), doesn't it
> > immediately fire?
> 
> Yes, if the interrupt is indeed level and the PHY is capable of
> remembering that an interrupt occurred while the system was suspended
> or was about to be suspended.

It should remember, in the WoL case. It keeps it power etc.

> The irq_wake_thread() ensures that the IRQ handler is called,
> should one of those conditions *not* be met.
> 
> Recall that phylib uses irq_default_primary_handler() as hardirq
> handler.  That handler does nothing else but wake the IRQ thread,
> which runs phy->handle_interrupt() in task context.
> 
> The irq_wake_thread() above likewise wakes the IRQ thread,
> i.e. it tells the scheduler to put it on the run queue.
> 
> If, as you say, the interrupt is level and fires upon enable_irq(),
> the result is that the scheduler is told twice to put the IRQ thread
> on the run queue.  Usually this will happen faster than the IRQ thread
> actually gets scheduled, so it will only run once.
> 
> In the unlikely event that the IRQ thread gets scheduled before the
> call to irq_wake_thread(), the IRQ thread will run twice.
> However, that's harmless.  IRQ handlers can cope with that.

I'm just slightly worried about the IRQ handler returning there was
nothing to do. The IRQ core counts such interrupts, and will disable
the interrupt if nobody says it is actually handling the
interrupts. But it needs to be a few interrupts before this kicks in,
so it should be safe.

One other thought is we should probably get the IRQ Maintainers to
look this patch over. Please could you repost and Cc: them.

Thanks

   Andrew
diff mbox series

Patch

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index ef62f357b76d..8d3ee3a6495b 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -31,6 +31,7 @@ 
 #include <linux/io.h>
 #include <linux/uaccess.h>
 #include <linux/atomic.h>
+#include <linux/suspend.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
 #include <net/sock.h>
@@ -976,6 +977,28 @@  static irqreturn_t phy_interrupt(int irq, void *phy_dat)
 	struct phy_driver *drv = phydev->drv;
 	irqreturn_t ret;
 
+	/* Wakeup interrupts may occur during a system sleep transition.
+	 * Postpone handling until the PHY has resumed.
+	 */
+	if (IS_ENABLED(CONFIG_PM_SLEEP) && phydev->irq_suspended) {
+		struct net_device *netdev = phydev->attached_dev;
+
+		if (netdev) {
+			struct device *parent = netdev->dev.parent;
+
+			if (netdev->wol_enabled)
+				pm_system_wakeup();
+			else if (device_may_wakeup(&netdev->dev))
+				pm_wakeup_dev_event(&netdev->dev, 0, true);
+			else if (parent && device_may_wakeup(parent))
+				pm_wakeup_dev_event(parent, 0, true);
+		}
+
+		phydev->irq_rerun = 1;
+		disable_irq_nosync(irq);
+		return IRQ_HANDLED;
+	}
+
 	mutex_lock(&phydev->lock);
 	ret = drv->handle_interrupt(phydev);
 	mutex_unlock(&phydev->lock);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 431a8719c635..46acddd865a7 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -278,6 +278,15 @@  static __maybe_unused int mdio_bus_phy_suspend(struct device *dev)
 	if (phydev->mac_managed_pm)
 		return 0;
 
+	/* Wakeup interrupts may occur during the system sleep transition when
+	 * the PHY is inaccessible. Set flag to postpone handling until the PHY
+	 * has resumed. Wait for concurrent interrupt handler to complete.
+	 */
+	if (phy_interrupt_is_valid(phydev)) {
+		phydev->irq_suspended = 1;
+		synchronize_irq(phydev->irq);
+	}
+
 	/* We must stop the state machine manually, otherwise it stops out of
 	 * control, possibly with the phydev->lock held. Upon resume, netdev
 	 * may call phy routines that try to grab the same lock, and that may
@@ -315,6 +324,20 @@  static __maybe_unused int mdio_bus_phy_resume(struct device *dev)
 	if (ret < 0)
 		return ret;
 no_resume:
+	if (phy_interrupt_is_valid(phydev)) {
+		phydev->irq_suspended = 0;
+		synchronize_irq(phydev->irq);
+
+		/* Rerun interrupts which were postponed by phy_interrupt()
+		 * because they occurred during the system sleep transition.
+		 */
+		if (phydev->irq_rerun) {
+			phydev->irq_rerun = 0;
+			enable_irq(phydev->irq);
+			irq_wake_thread(phydev->irq, phydev);
+		}
+	}
+
 	if (phydev->attached_dev && phydev->adjust_link)
 		phy_start_machine(phydev);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 508f1149665b..b09f7d36cff2 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -572,6 +572,10 @@  struct macsec_ops;
  * @mdix_ctrl: User setting of crossover
  * @pma_extable: Cached value of PMA/PMD Extended Abilities Register
  * @interrupts: Flag interrupts have been enabled
+ * @irq_suspended: Flag indicating PHY is suspended and therefore interrupt
+ *                 handling shall be postponed until PHY has resumed
+ * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended,
+ *             requiring a rerun of the interrupt handler after resume
  * @interface: enum phy_interface_t value
  * @skb: Netlink message for cable diagnostics
  * @nest: Netlink nest used for cable diagnostics
@@ -626,6 +630,8 @@  struct phy_device {
 
 	/* Interrupts are enabled */
 	unsigned interrupts:1;
+	unsigned irq_suspended:1;
+	unsigned irq_rerun:1;
 
 	enum phy_state state;