diff mbox

[RFC,v2,1/2] net: ethernet: nb8800: Reset HW block in ndo_open

Message ID d817da35-f977-5985-4aae-73617004fac3@free.fr (mailing list archive)
State New, archived
Headers show

Commit Message

Mason Aug. 1, 2017, 4:37 p.m. UTC
Move all HW initializations to nb8800_init.
This provides the basis for suspend/resume support.
---
 drivers/net/ethernet/aurora/nb8800.c | 50 +++++++++++++++++-------------------
 drivers/net/ethernet/aurora/nb8800.h |  1 +
 2 files changed, 25 insertions(+), 26 deletions(-)

Comments

Mans Rullgard Aug. 2, 2017, 11:02 a.m. UTC | #1
Mason <slash.tmp@free.fr> writes:

> Move all HW initializations to nb8800_init.
> This provides the basis for suspend/resume support.
> ---
>  drivers/net/ethernet/aurora/nb8800.c | 50 +++++++++++++++++-------------------
>  drivers/net/ethernet/aurora/nb8800.h |  1 +
>  2 files changed, 25 insertions(+), 26 deletions(-)

You're still not doing anything to preserve flow control and multicast
configuration, and those are probably not the only ones.

Worse, you're now never tearing down dma properly, which means the
hardware can be left with active pointers to memory no longer allocated
to it.

Finally, you still haven't explained why the hw needs to be reset in
ndo_open().  Whatever is causing your lockup can almost certainly be
triggered in some other way too.  I will not accept this side-stepping
of the issue.

> diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
> index e94159507847..aa18ea25d91f 100644
> --- a/drivers/net/ethernet/aurora/nb8800.c
> +++ b/drivers/net/ethernet/aurora/nb8800.c
> @@ -39,6 +39,7 @@
>
>  #include "nb8800.h"
>
> +static void nb8800_init(struct net_device *dev);
>  static void nb8800_tx_done(struct net_device *dev);
>  static int nb8800_dma_stop(struct net_device *dev);
>
> @@ -957,6 +958,8 @@ static int nb8800_open(struct net_device *dev)
>  	struct phy_device *phydev;
>  	int err;
>
> +	nb8800_init(dev);
> +
>  	/* clear any pending interrupts */
>  	nb8800_writel(priv, NB8800_RXC_SR, 0xf);
>  	nb8800_writel(priv, NB8800_TXC_SR, 0xf);
> @@ -1246,11 +1249,6 @@ static int nb8800_hw_init(struct net_device *dev)
>  	nb8800_writeb(priv, NB8800_PQ1, val >> 8);
>  	nb8800_writeb(priv, NB8800_PQ2, val & 0xff);
>
> -	/* Auto-negotiate by default */
> -	priv->pause_aneg = true;
> -	priv->pause_rx = true;
> -	priv->pause_tx = true;
> -
>  	nb8800_mc_init(dev, 0);
>
>  	return 0;
> @@ -1350,6 +1348,20 @@ static const struct of_device_id nb8800_dt_ids[] = {
>  };
>  MODULE_DEVICE_TABLE(of, nb8800_dt_ids);
>
> +static void nb8800_init(struct net_device *dev)
> +{
> +	struct nb8800_priv *priv = netdev_priv(dev);
> +	const struct nb8800_ops *ops = priv->ops;
> +
> +	if (ops && ops->reset)
> +		ops->reset(dev);
> +	nb8800_hw_init(dev);
> +	if (ops && ops->init)
> +		ops->init(dev);
> +	nb8800_update_mac_addr(dev);
> +	priv->speed = 0;
> +}
> +
>  static int nb8800_probe(struct platform_device *pdev)
>  {
>  	const struct of_device_id *match;
> @@ -1389,6 +1401,7 @@ static int nb8800_probe(struct platform_device *pdev)
>
>  	priv = netdev_priv(dev);
>  	priv->base = base;
> +	priv->ops = ops;
>
>  	priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
>  	if (priv->phy_mode < 0)
> @@ -1407,12 +1420,6 @@ static int nb8800_probe(struct platform_device *pdev)
>
>  	spin_lock_init(&priv->tx_lock);
>
> -	if (ops && ops->reset) {
> -		ret = ops->reset(dev);
> -		if (ret)
> -			goto err_disable_clk;
> -	}
> -
>  	bus = devm_mdiobus_alloc(&pdev->dev);
>  	if (!bus) {
>  		ret = -ENOMEM;
> @@ -1454,21 +1461,16 @@ static int nb8800_probe(struct platform_device *pdev)
>
>  	priv->mii_bus = bus;
>
> -	ret = nb8800_hw_init(dev);
> -	if (ret)
> -		goto err_deregister_fixed_link;
> -
> -	if (ops && ops->init) {
> -		ret = ops->init(dev);
> -		if (ret)
> -			goto err_deregister_fixed_link;
> -	}
> -
>  	dev->netdev_ops = &nb8800_netdev_ops;
>  	dev->ethtool_ops = &nb8800_ethtool_ops;
>  	dev->flags |= IFF_MULTICAST;
>  	dev->irq = irq;
>
> +	/* Auto-negotiate by default */
> +	priv->pause_aneg = true;
> +	priv->pause_rx = true;
> +	priv->pause_tx = true;
> +
>  	mac = of_get_mac_address(pdev->dev.of_node);
>  	if (mac)
>  		ether_addr_copy(dev->dev_addr, mac);
> @@ -1476,14 +1478,12 @@ static int nb8800_probe(struct platform_device *pdev)
>  	if (!is_valid_ether_addr(dev->dev_addr))
>  		eth_hw_addr_random(dev);
>
> -	nb8800_update_mac_addr(dev);
> -
>  	netif_carrier_off(dev);
>
>  	ret = register_netdev(dev);
>  	if (ret) {
>  		netdev_err(dev, "failed to register netdev\n");
> -		goto err_free_dma;
> +		goto err_deregister_fixed_link;
>  	}
>
>  	netif_napi_add(dev, &priv->napi, nb8800_poll, NAPI_POLL_WEIGHT);
> @@ -1492,8 +1492,6 @@ static int nb8800_probe(struct platform_device *pdev)
>
>  	return 0;
>
> -err_free_dma:
> -	nb8800_dma_free(dev);
>  err_deregister_fixed_link:
>  	if (of_phy_is_fixed_link(pdev->dev.of_node))
>  		of_phy_deregister_fixed_link(pdev->dev.of_node);
> diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h
> index 6ec4a956e1e5..d5f4481a2c7b 100644
> --- a/drivers/net/ethernet/aurora/nb8800.h
> +++ b/drivers/net/ethernet/aurora/nb8800.h
> @@ -305,6 +305,7 @@ struct nb8800_priv {
>  	dma_addr_t			tx_desc_dma;
>
>  	struct clk			*clk;
> +	const struct nb8800_ops		*ops;
>  };
>
>  struct nb8800_ops {
> -- 
> 2.11.0
Mason Aug. 2, 2017, 11:54 a.m. UTC | #2
On 02/08/2017 13:02, Måns Rullgård wrote:

> Mason wrote:
> 
>> Move all HW initializations to nb8800_init.
>> This provides the basis for suspend/resume support.
>> ---
>>  drivers/net/ethernet/aurora/nb8800.c | 50 +++++++++++++++++-------------------
>>  drivers/net/ethernet/aurora/nb8800.h |  1 +
>>  2 files changed, 25 insertions(+), 26 deletions(-)
> 
> You're still not doing anything to preserve flow control and multicast
> configuration, and those are probably not the only ones.

I did handle flow control (as far as I can tell).
The current settings are stored in:
priv->pause_aneg, priv->pause_rx, priv->pause_tx
and nb8800_pause_config() is called from nb8800_link_reconfigure()

I'll take a closer look at multicast settings.

> Worse, you're now never tearing down dma properly, which means the
> hardware can be left with active pointers to memory no longer allocated
> to it.

You're making it sound like there is a risk those pointers
might be used somehow. There is no such risk AFAICT.
The PHY is disconnected, NAPI is stopped, RX and TX have
been disabled, and the ISR has been removed.

I have considered putting the HW block in reset (clock gated)
at the end of nb8800_stop() to save power, which would make
the issue you point out moot.

The reason I removed nb8800_dma_stop() in nb8800_stop()
is because it hangs when called from nb8800_suspend().
(It requires interrupts to make progress, and interrupts
seem to be disabled when nb8800_suspend() is called.)


Broader question for netdev:

I've been wondering about costly close operations.
If closing a device is complex (in terms of code), at which
point does it become simpler to just reset the block, and
avoid writing/maintaining/debugging the code performing
said close operation?

> Finally, you still haven't explained why the hw needs to be reset in
> ndo_open().  Whatever is causing your lockup can almost certainly be
> triggered in some other way too.  I will not accept this side-stepping
> of the issue.

(I was not aware that you were the final authority on which
patches are accepted upstream.)

FWIW, I have placed a formal request for the HW dev to
investigate, as I could not reproduce on tango4, despite
several days wasted on this issue.

In the mean time, the driver in its current form does not
support suspend/resume. How would *you* implement it?
(IMO, my way is great in its simplicity and code reuse.)

Regards.
Andrew Lunn Aug. 2, 2017, 1:54 p.m. UTC | #3
> > Finally, you still haven't explained why the hw needs to be reset in
> > ndo_open().  Whatever is causing your lockup can almost certainly be
> > triggered in some other way too.  I will not accept this side-stepping
> > of the issue.
> 
> (I was not aware that you were the final authority on which
> patches are accepted upstream.)

./scripts/get_maintainer.pl -f drivers/net/ethernet/aurora/nb8800.c
"David S. Miller" <davem@davemloft.net> (commit_signer:6/6=100%)
Mans Rullgard <mans@mansr.com> (commit_signer:2/6=33%)
Jarod Wilson <jarod@redhat.com> (commit_signer:1/6=17%,authored:1/6=17%,removed_lines:1/17=6%)
Stephen Hemminger <stephen@networkplumber.org> (commit_signer:1/6=17%)
Florian Fainelli <f.fainelli@gmail.com> (commit_signer:1/6=17%,added_lines:1/14=7%,removed_lines:11/17=65%)
Javier Martinez Canillas <javier@osg.samsung.com> (authored:1/6=17%,added_lines:1/14=7%)
Johan Hovold <johan@kernel.org> (authored:1/6=17%,added_lines:7/14=50%,removed_lines:2/17=12%)
Johannes Berg <johannes.berg@intel.com> (authored:1/6=17%,added_lines:2/14=14%,removed_lines:2/17=12%)
Wei Yongjun <weiyongjun1@huawei.com> (authored:1/6=17%,added_lines:3/14=21%,removed_lines:1/17=6%)

David is very likely to listen to Mans's recommendations, since Mans
has done most of the review work for this driver.

      Andrew
Mans Rullgard Aug. 2, 2017, 2:33 p.m. UTC | #4
Mason <slash.tmp@free.fr> writes:

> On 02/08/2017 13:02, Måns Rullgård wrote:
>
>> Mason wrote:
>> 
>>> Move all HW initializations to nb8800_init.
>>> This provides the basis for suspend/resume support.
>>> ---
>>>  drivers/net/ethernet/aurora/nb8800.c | 50 +++++++++++++++++-------------------
>>>  drivers/net/ethernet/aurora/nb8800.h |  1 +
>>>  2 files changed, 25 insertions(+), 26 deletions(-)
>> 
>> You're still not doing anything to preserve flow control and multicast
>> configuration, and those are probably not the only ones.
>
> I did handle flow control (as far as I can tell).
> The current settings are stored in:
> priv->pause_aneg, priv->pause_rx, priv->pause_tx
> and nb8800_pause_config() is called from nb8800_link_reconfigure()

I think the whole pause setup needs to be revisited.  It's a mess.
Maybe I'll find the time one day.

> I'll take a closer look at multicast settings.

You're currently losing all multicast setup as well as promiscuous mode
if that has been enabled.

>> Worse, you're now never tearing down dma properly, which means the
>> hardware can be left with active pointers to memory no longer allocated
>> to it.
>
> You're making it sound like there is a risk those pointers
> might be used somehow. There is no such risk AFAICT.
> The PHY is disconnected, NAPI is stopped, RX and TX have
> been disabled, and the ISR has been removed.

The interrupt handler and NAPI have nothing to do with it since they
only get involved *after* the hw has filled the buffers.  If you've
given the hardware control of a memory buffer, you should damn well take
it back before reusing that memory for something else.  Otherwise you'll
eventually have a very difficult to debug problem and a security risk.

> I have considered putting the HW block in reset (clock gated)
> at the end of nb8800_stop() to save power, which would make
> the issue you point out moot.

That's not necessarily possible.  It is on Sigma SoCs, but it doesn't
have to be.

> The reason I removed nb8800_dma_stop() in nb8800_stop()
> is because it hangs when called from nb8800_suspend().
> (It requires interrupts to make progress, and interrupts
> seem to be disabled when nb8800_suspend() is called.)

It shouldn't require interrupts.  If it does for some reason, that
should be fixed.

> Broader question for netdev:
>
> I've been wondering about costly close operations.
> If closing a device is complex (in terms of code), at which
> point does it become simpler to just reset the block, and
> avoid writing/maintaining/debugging the code performing
> said close operation?
>
>> Finally, you still haven't explained why the hw needs to be reset in
>> ndo_open().  Whatever is causing your lockup can almost certainly be
>> triggered in some other way too.  I will not accept this side-stepping
>> of the issue.
>
> (I was not aware that you were the final authority on which
> patches are accepted upstream.)
>
> FWIW, I have placed a formal request for the HW dev to
> investigate, as I could not reproduce on tango4, despite
> several days wasted on this issue.
>
> In the mean time, the driver in its current form does not
> support suspend/resume. How would *you* implement it?

I'm not saying it's a bad idea for suspend.  It might even be the only
way.
diff mbox

Patch

diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index e94159507847..aa18ea25d91f 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -39,6 +39,7 @@ 
 
 #include "nb8800.h"
 
+static void nb8800_init(struct net_device *dev);
 static void nb8800_tx_done(struct net_device *dev);
 static int nb8800_dma_stop(struct net_device *dev);
 
@@ -957,6 +958,8 @@  static int nb8800_open(struct net_device *dev)
 	struct phy_device *phydev;
 	int err;
 
+	nb8800_init(dev);
+
 	/* clear any pending interrupts */
 	nb8800_writel(priv, NB8800_RXC_SR, 0xf);
 	nb8800_writel(priv, NB8800_TXC_SR, 0xf);
@@ -1246,11 +1249,6 @@  static int nb8800_hw_init(struct net_device *dev)
 	nb8800_writeb(priv, NB8800_PQ1, val >> 8);
 	nb8800_writeb(priv, NB8800_PQ2, val & 0xff);
 
-	/* Auto-negotiate by default */
-	priv->pause_aneg = true;
-	priv->pause_rx = true;
-	priv->pause_tx = true;
-
 	nb8800_mc_init(dev, 0);
 
 	return 0;
@@ -1350,6 +1348,20 @@  static const struct of_device_id nb8800_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, nb8800_dt_ids);
 
+static void nb8800_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	const struct nb8800_ops *ops = priv->ops;
+
+	if (ops && ops->reset)
+		ops->reset(dev);
+	nb8800_hw_init(dev);
+	if (ops && ops->init)
+		ops->init(dev);
+	nb8800_update_mac_addr(dev);
+	priv->speed = 0;
+}
+
 static int nb8800_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *match;
@@ -1389,6 +1401,7 @@  static int nb8800_probe(struct platform_device *pdev)
 
 	priv = netdev_priv(dev);
 	priv->base = base;
+	priv->ops = ops;
 
 	priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
 	if (priv->phy_mode < 0)
@@ -1407,12 +1420,6 @@  static int nb8800_probe(struct platform_device *pdev)
 
 	spin_lock_init(&priv->tx_lock);
 
-	if (ops && ops->reset) {
-		ret = ops->reset(dev);
-		if (ret)
-			goto err_disable_clk;
-	}
-
 	bus = devm_mdiobus_alloc(&pdev->dev);
 	if (!bus) {
 		ret = -ENOMEM;
@@ -1454,21 +1461,16 @@  static int nb8800_probe(struct platform_device *pdev)
 
 	priv->mii_bus = bus;
 
-	ret = nb8800_hw_init(dev);
-	if (ret)
-		goto err_deregister_fixed_link;
-
-	if (ops && ops->init) {
-		ret = ops->init(dev);
-		if (ret)
-			goto err_deregister_fixed_link;
-	}
-
 	dev->netdev_ops = &nb8800_netdev_ops;
 	dev->ethtool_ops = &nb8800_ethtool_ops;
 	dev->flags |= IFF_MULTICAST;
 	dev->irq = irq;
 
+	/* Auto-negotiate by default */
+	priv->pause_aneg = true;
+	priv->pause_rx = true;
+	priv->pause_tx = true;
+
 	mac = of_get_mac_address(pdev->dev.of_node);
 	if (mac)
 		ether_addr_copy(dev->dev_addr, mac);
@@ -1476,14 +1478,12 @@  static int nb8800_probe(struct platform_device *pdev)
 	if (!is_valid_ether_addr(dev->dev_addr))
 		eth_hw_addr_random(dev);
 
-	nb8800_update_mac_addr(dev);
-
 	netif_carrier_off(dev);
 
 	ret = register_netdev(dev);
 	if (ret) {
 		netdev_err(dev, "failed to register netdev\n");
-		goto err_free_dma;
+		goto err_deregister_fixed_link;
 	}
 
 	netif_napi_add(dev, &priv->napi, nb8800_poll, NAPI_POLL_WEIGHT);
@@ -1492,8 +1492,6 @@  static int nb8800_probe(struct platform_device *pdev)
 
 	return 0;
 
-err_free_dma:
-	nb8800_dma_free(dev);
 err_deregister_fixed_link:
 	if (of_phy_is_fixed_link(pdev->dev.of_node))
 		of_phy_deregister_fixed_link(pdev->dev.of_node);
diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h
index 6ec4a956e1e5..d5f4481a2c7b 100644
--- a/drivers/net/ethernet/aurora/nb8800.h
+++ b/drivers/net/ethernet/aurora/nb8800.h
@@ -305,6 +305,7 @@  struct nb8800_priv {
 	dma_addr_t			tx_desc_dma;
 
 	struct clk			*clk;
+	const struct nb8800_ops		*ops;
 };
 
 struct nb8800_ops {