diff mbox series

[net-next,v4,05/12] net: ethernet: oa_tc6: implement error interrupts unmasking

Message ID 20240418125648.372526-6-Parthiban.Veerasooran@microchip.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series Add support for OPEN Alliance 10BASE-T1x MACPHY Serial Interface | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/apply fail Patch does not apply to net-next-1

Commit Message

Parthiban Veerasooran April 18, 2024, 12:56 p.m. UTC
This will unmask the following error interrupts from the MAC-PHY.
  tx protocol error
  rx buffer overflow error
  loss of framing error
  header error
The MAC-PHY will signal an error by setting the EXST bit in the receive
data footer which will then allow the host to read the STATUS0 register
to find the source of the error.

Signed-off-by: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
---
 drivers/net/ethernet/oa_tc6.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

Comments

Andrew Lunn April 23, 2024, 11:27 p.m. UTC | #1
On Thu, Apr 18, 2024 at 06:26:41PM +0530, Parthiban Veerasooran wrote:
> This will unmask the following error interrupts from the MAC-PHY.
>   tx protocol error
>   rx buffer overflow error
>   loss of framing error
>   header error
> The MAC-PHY will signal an error by setting the EXST bit in the receive
> data footer which will then allow the host to read the STATUS0 register
> to find the source of the error.
> 
> Signed-off-by: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew
Ramón Nordin Rodriguez April 27, 2024, 7:52 p.m. UTC | #2
> +static int oa_tc6_unmask_macphy_error_interrupts(struct oa_tc6 *tc6)
> +{
> +	u32 regval;
> +	int ret;
> +
> +	ret = oa_tc6_read_register(tc6, OA_TC6_REG_INT_MASK0, &regval);
> +	if (ret)
> +		return ret;
> +
> +	regval &= ~(INT_MASK0_TX_PROTOCOL_ERR_MASK |
> +		    INT_MASK0_RX_BUFFER_OVERFLOW_ERR_MASK |
> +		    INT_MASK0_LOSS_OF_FRAME_ERR_MASK |
> +		    INT_MASK0_HEADER_ERR_MASK);
> +
> +	return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, regval);
> +}
> +

This togheter with patch 11 works poorly for me. I get alot of kernel
output, dropped packets and lower performance.
Below is an example for a run when I curl a 10MB blob

time curl 20.0.0.55:8000/rdump -o dump -w '{%speed_download}'
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  U[  387.944737] net_ratelimit: 38 callbacks suppressed
pload   Total   Spent    Left  Sp[  387.944755] eth0: Receive buffer overflow error
eed
  0     0    0     0    0     0      0      0 --:--:-- --:-[  387.961424] eth0: Receive buffer overflow error
  0 10.0M    0  2896    0     0  13031      0  0:13:24 --:--:--  0:13:24 12986[  388.204257] eth0: Receive buffer overflow error
[  388.209848] eth0: Receive buffer overflow error
[  388.240705] eth0: Receive buffer overflow error
[  388.246205] eth0: Receive buffer overflow error
[  388.360265] eth0: Receive buffer overflow error
[  388.365755] eth0: Receive buffer overflow error
[  388.371328] eth0: Receive buffer overflow error
[  388.396937] eth0: Receive buffer overflow error
 32 10.0M   32 3362k    0     0   826k      0  0:00:12  0:00:04  0:00:08  826k[  392.950696] net_ratelimit: 84 callbacks suppressed
[  392.950711] eth0: Receive buffer overflow error
 41 10.0M   41 4259k    0     0   840k      0  0:00:12  0:00:05  0:00:07  878k[  393.009785] eth0: Receive buffer overflow error
[  393.016651] eth0: Receive buffer overflow error
[  393.121278] eth0: Receive buffer overflow error
[  393.126876] eth0: Receive buffer overflow error
[  393.204983] eth0: Receive buffer overflow error
[  393.210590] eth0: Receive buffer overflow error
[  393.248977] eth0: Receive buffer overflow error
[  393.254575] eth0: Receive buffer overflow error
[  393.352949] eth0: Receive buffer overflow error
 77 10.0M   77 7903k    0     0   870k      0  0:00:11  0:00:09  0:00:02  906k[  397.956674] net_ratelimit: 88 callbacks suppressed
[  397.956691] eth0: Receive buffer overflow error
[  397.967182] eth0: Receive buffer overflow error
 86 10.0M   86 8837k    0     0   877k      0  0:00:11  0:00:10  0:00:01  915k[  398.048630] eth0: Receive buffer overflow error
[  398.054171] eth0: Receive buffer overflow error
[  398.092858] eth0: Receive buffer overflow error
[  398.113975] eth0: Receive buffer overflow error
[  398.177558] eth0: Receive buffer overflow error
[  398.209782] eth0: Receive buffer overflow error
[  398.272621] eth0: Receive buffer overflow error
[  398.278306] eth0: Receive buffer overflow error
100 10.0M  100 10.0M    0     0   877k      0  0:00:11  0:00:11 --:--:--  921k
{%speed_download}
real    0m11.681s
user    0m0.117s
sys     0m0.226s

I tried this patch

diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c
index 9f17f3712137..bd7bd3ef6897 100644
--- a/drivers/net/ethernet/oa_tc6.c
+++ b/drivers/net/ethernet/oa_tc6.c
@@ -615,21 +615,9 @@ static int oa_tc6_sw_reset_macphy(struct oa_tc6 *tc6)
        return oa_tc6_write_register(tc6, OA_TC6_REG_STATUS0, regval);
 }

-static int oa_tc6_unmask_macphy_error_interrupts(struct oa_tc6 *tc6)
+static int oa_tc6_disable_imask0_interrupts(struct oa_tc6 *tc6)
 {
-       u32 regval;
-       int ret;
-
-       ret = oa_tc6_read_register(tc6, OA_TC6_REG_INT_MASK0, &regval);
-       if (ret)
-               return ret;
-
-       regval &= ~(INT_MASK0_TX_PROTOCOL_ERR_MASK |
-                   INT_MASK0_RX_BUFFER_OVERFLOW_ERR_MASK |
-                   INT_MASK0_LOSS_OF_FRAME_ERR_MASK |
-                   INT_MASK0_HEADER_ERR_MASK);
-
-       return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, regval);
+       return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, (u32)-1);
 }

 static int oa_tc6_enable_data_transfer(struct oa_tc6 *tc6)
@@ -1234,7 +1222,7 @@ struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev)
                return NULL;
        }

-       ret = oa_tc6_unmask_macphy_error_interrupts(tc6);
+       ret = oa_tc6_disable_imask0_interrupts(tc6);
        if (ret) {
                dev_err(&tc6->spi->dev,
                        "MAC-PHY error interrupts unmask failed: %d\n", ret);

Which results in no log spam, ~5-10% higher throughput and no dropped
packets when I look at /sys/class/net/eth0/statistics/rx_dropped

Wheras when I did an equivalent run with a 100MB file I got 1918
rx_dropped with interrupts enabled.

When I dig through wireshark I can see some retransmissions when
interrupts are disabled, but the network stack seems to handle that with
grace.

Maybe the decision on wheter to enable interrupts or not should be on
the mac driver level, not network framework level?
If this really is preferrable for some reason, could a module option be
added so that it does not require running a fork to disable interrupts?

There is also a interrupt mask 1 register that is not touched in this
patch series, and since things started working better for me with just
touching reg0 I didn't bother tweaking anything int reg1.

BR
Ramón
Andrew Lunn April 27, 2024, 9:17 p.m. UTC | #3
On Sat, Apr 27, 2024 at 09:52:15PM +0200, Ramón Nordin Rodriguez wrote:
> > +static int oa_tc6_unmask_macphy_error_interrupts(struct oa_tc6 *tc6)
> > +{
> > +	u32 regval;
> > +	int ret;
> > +
> > +	ret = oa_tc6_read_register(tc6, OA_TC6_REG_INT_MASK0, &regval);
> > +	if (ret)
> > +		return ret;
> > +
> > +	regval &= ~(INT_MASK0_TX_PROTOCOL_ERR_MASK |
> > +		    INT_MASK0_RX_BUFFER_OVERFLOW_ERR_MASK |
> > +		    INT_MASK0_LOSS_OF_FRAME_ERR_MASK |
> > +		    INT_MASK0_HEADER_ERR_MASK);
> > +
> > +	return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, regval);
> > +}
> > +
> 
> This togheter with patch 11 works poorly for me. I get alot of kernel
> output, dropped packets and lower performance.
> Below is an example for a run when I curl a 10MB blob
> 
> time curl 20.0.0.55:8000/rdump -o dump -w '{%speed_download}'
>   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
>                                  Dload  U[  387.944737] net_ratelimit: 38 callbacks suppressed
> pload   Total   Spent    Left  Sp[  387.944755] eth0: Receive buffer overflow error
> eed
>   0     0    0     0    0     0      0      0 --:--:-- --:-[  387.961424] eth0: Receive buffer overflow error
>   0 10.0M    0  2896    0     0  13031      0  0:13:24 --:--:--  0:13:24 12986[  388.204257] eth0: Receive buffer overflow error
> [  388.209848] eth0: Receive buffer overflow error

How fast is your SPI bus? Faster than the link speed? Or slower?

It could be different behaviour is needed depending on the SPI bus
speed. If the SPI bus is faster than the link speed, by some margin,
the receiver buffer should not overflow, since the CPU can empty the
buffer faster than it fills.

If however, the SPI bus is slower than the link speed, there will be
buffer overflows, and a reliance on TCP backing off and slowing down.
The driver should not be spamming the log, since it is going to happen
and there is nothing that can be done about it.

> I tried this patch
> 
> diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c
> index 9f17f3712137..bd7bd3ef6897 100644
> --- a/drivers/net/ethernet/oa_tc6.c
> +++ b/drivers/net/ethernet/oa_tc6.c
> @@ -615,21 +615,9 @@ static int oa_tc6_sw_reset_macphy(struct oa_tc6 *tc6)
>         return oa_tc6_write_register(tc6, OA_TC6_REG_STATUS0, regval);
>  }
> 
> -static int oa_tc6_unmask_macphy_error_interrupts(struct oa_tc6 *tc6)
> +static int oa_tc6_disable_imask0_interrupts(struct oa_tc6 *tc6)
>  {
> -       u32 regval;
> -       int ret;
> -
> -       ret = oa_tc6_read_register(tc6, OA_TC6_REG_INT_MASK0, &regval);
> -       if (ret)
> -               return ret;
> -
> -       regval &= ~(INT_MASK0_TX_PROTOCOL_ERR_MASK |
> -                   INT_MASK0_RX_BUFFER_OVERFLOW_ERR_MASK |
> -                   INT_MASK0_LOSS_OF_FRAME_ERR_MASK |
> -                   INT_MASK0_HEADER_ERR_MASK);
> -
> -       return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, regval);
> +       return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, (u32)-1);

So this appears to be disabling all error interrupts?

This is maybe going too far. Overflow errors are going to happen if
you have a slow SPI bus. So i probably would not enable that. However,
are the other errors actually expected in normal usage? If not, leave
them enabled, because they might indicate real problems.

> Which results in no log spam, ~5-10% higher throughput and no dropped
> packets when I look at /sys/class/net/eth0/statistics/rx_dropped

You cannot trust rx_dropped because you just disabled the code which
increments it! The device is probably still dropping packets, and they
are no longer counted.

It could be the performance increase comes from two places:

1) Spending time and bus bandwidth dealing with the buffer overflow
interrupt

2) Printing out the serial port.

Please could you benchmark a few things:

1) Remove the printk("Receive buffer overflow error"), but otherwise
keep the code the same. That will give us an idea how much the serial
port matters.

2) Disable only the RX buffer overflow interrupt

3) Disable all error interrupts.

   Andrew
Ramón Nordin Rodriguez April 27, 2024, 9:55 p.m. UTC | #4
> How fast is your SPI bus? Faster than the link speed? Or slower?
> 
> It could be different behaviour is needed depending on the SPI bus
> speed. If the SPI bus is faster than the link speed, by some margin,
> the receiver buffer should not overflow, since the CPU can empty the
> buffer faster than it fills.

I'm running at 25MHz, I'm guessing that should translate to fast enough
for the 10MBit half duplex link.
But I'm not sure how the spi clock translates to bps here.

> 
> If however, the SPI bus is slower than the link speed, there will be
> buffer overflows, and a reliance on TCP backing off and slowing down.
> The driver should not be spamming the log, since it is going to happen
> and there is nothing that can be done about it.
> 

I agree, I think the print could be a DBG if deemed necessary, but there
is also the dropped counter to look at.

> > I tried this patch
> > 
> > diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c
> > index 9f17f3712137..bd7bd3ef6897 100644
> > --- a/drivers/net/ethernet/oa_tc6.c
> > +++ b/drivers/net/ethernet/oa_tc6.c
> > @@ -615,21 +615,9 @@ static int oa_tc6_sw_reset_macphy(struct oa_tc6 *tc6)
> >         return oa_tc6_write_register(tc6, OA_TC6_REG_STATUS0, regval);
> >  }
> > 
> > -static int oa_tc6_unmask_macphy_error_interrupts(struct oa_tc6 *tc6)
> > +static int oa_tc6_disable_imask0_interrupts(struct oa_tc6 *tc6)
> >  {
> > -       u32 regval;
> > -       int ret;
> > -
> > -       ret = oa_tc6_read_register(tc6, OA_TC6_REG_INT_MASK0, &regval);
> > -       if (ret)
> > -               return ret;
> > -
> > -       regval &= ~(INT_MASK0_TX_PROTOCOL_ERR_MASK |
> > -                   INT_MASK0_RX_BUFFER_OVERFLOW_ERR_MASK |
> > -                   INT_MASK0_LOSS_OF_FRAME_ERR_MASK |
> > -                   INT_MASK0_HEADER_ERR_MASK);
> > -
> > -       return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, regval);
> > +       return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, (u32)-1);
> 
> So this appears to be disabling all error interrupts?

Yes, and I think you are right in that it's an overcorrection. There is
a secondary interrupt mask register as well that is not touched by the
driver, so that's left at whatever the chip defaults to on reset.

> 
> This is maybe going too far. Overflow errors are going to happen if
> you have a slow SPI bus. So i probably would not enable that. However,
> are the other errors actually expected in normal usage? If not, leave
> them enabled, because they might indicate real problems.

I'm guessing you are right and that the others actually would be
meningful to log.
There is a nested question here as well, and that is wheter to keep or
drop the code that drops the rx buffer on overflow interrupt.
I think not dropping the full buffer could be one of the reasons for the
perf change.

> 
> > Which results in no log spam, ~5-10% higher throughput and no dropped
> > packets when I look at /sys/class/net/eth0/statistics/rx_dropped
> 
> You cannot trust rx_dropped because you just disabled the code which
> increments it! The device is probably still dropping packets, and they
> are no longer counted.

I'll curb my enthusiasm a bit :)

> 
> It could be the performance increase comes from two places:
> 
> 1) Spending time and bus bandwidth dealing with the buffer overflow
> interrupt
> 
> 2) Printing out the serial port.
> 

I think it's possible that the buffer cleanup triggered after the
overflow interrupt hits could be cause 3) here, but just a guess.

> Please could you benchmark a few things:
> 
> 1) Remove the printk("Receive buffer overflow error"), but otherwise
> keep the code the same. That will give us an idea how much the serial
> port matters.
> 
> 2) Disable only the RX buffer overflow interrupt
> 
> 3) Disable all error interrupts.
> 

Thanks for layout it out so clearly. I'll run through the scenarios!

R
Ramón Nordin Rodriguez April 28, 2024, 9:54 a.m. UTC | #5
> It could be the performance increase comes from two places:
> 
> 1) Spending time and bus bandwidth dealing with the buffer overflow
> interrupt
> 
> 2) Printing out the serial port.
> 
> Please could you benchmark a few things:
> 
> 1) Remove the printk("Receive buffer overflow error"), but otherwise
> keep the code the same. That will give us an idea how much the serial
> port matters.
> 
> 2) Disable only the RX buffer overflow interrupt
> 
> 3) Disable all error interrupts.
> 



Test setup
- Server side - 
PC with a lan8670 usb eval board running a http server that serves
a 10MB binary blob. The http server is just python3 -m http.server

- Client side -
iMX8mn board (quad core arm64) with lan8650 mac-phy (25MHz spi)
running curl to download the blob from the server and writing to
a ramfs (ddr4 1.xGHz, so should not be a bottleneck)

Below are a collection of samples of different test runs. All of the
test runs have run a minor patch for hw reset (else nothing works for me).
Using curl is not the most scientific approach here, but iperf has
not exposed any problems for me earlier with overflows.
So sticking with curl since it's easy and definetly gets the overflows.

n  |  name     |  min  |  avg  |  max  |  rx dropped  |  samples
1  |  no mod   |  827K |  846K |  891K |      945     |     5
2  |  no log   |  711K |  726K |  744K |      562     |     5
3  |  less irq |  815K |  833K |  846K |      N/A     |     5
4  |  no irq   |  914K |  924K |  931K |      N/A     |     5
5  |  simple   |  857K |  868K |  879K |      615     |     5

Description of each scenario

1 - no mod
So this just runs a hw reset to get the chip working (described in earlier posts)

2 - no log
This scenario just removes the logging when handling the irq state
--- a/drivers/net/ethernet/oa_tc6.c
+++ b/drivers/net/ethernet/oa_tc6.c
@@ -688,8 +688,6 @@ static int oa_tc6_process_extended_status(struct oa_tc6 *tc6)
        if (FIELD_GET(STATUS0_RX_BUFFER_OVERFLOW_ERROR, value)) {
                tc6->rx_buf_overflow = true;
                oa_tc6_cleanup_ongoing_rx_skb(tc6);
-               net_err_ratelimited("%s: Receive buffer overflow error\n",
-                                   tc6->netdev->name);
                return -EAGAIN;
        }
        if (FIELD_GET(STATUS0_TX_PROTOCOL_ERROR, value)) {

3 - less irq
This scenario disables the overflow interrupt but keeps the others

--- a/drivers/net/ethernet/oa_tc6.c
+++ b/drivers/net/ethernet/oa_tc6.c
@@ -625,10 +625,10 @@ static int oa_tc6_unmask_macphy_error_interrupts(struct oa_tc6 *tc6)
                return ret;

        regval &= ~(INT_MASK0_TX_PROTOCOL_ERR_MASK |
-                   INT_MASK0_RX_BUFFER_OVERFLOW_ERR_MASK |
                    INT_MASK0_LOSS_OF_FRAME_ERR_MASK |
                    INT_MASK0_HEADER_ERR_MASK);

+       regval |= INT_MASK0_RX_BUFFER_OVERFLOW_ERR_MASK;
        return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, regval);
 }

4 - no irq
This scenario disables all imask0 interrupts with the following change

diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c
index 9f17f3712137..88a9c6ccb37a 100644
--- a/drivers/net/ethernet/oa_tc6.c
+++ b/drivers/net/ethernet/oa_tc6.c
@@ -624,12 +624,7 @@ static int oa_tc6_unmask_macphy_error_interrupts(struct oa_tc6 *tc6)
        if (ret)
                return ret;

-       regval &= ~(INT_MASK0_TX_PROTOCOL_ERR_MASK |
-                   INT_MASK0_RX_BUFFER_OVERFLOW_ERR_MASK |
-                   INT_MASK0_LOSS_OF_FRAME_ERR_MASK |
-                   INT_MASK0_HEADER_ERR_MASK);
-
-       return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, regval);
+       return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, (u32)-1);
 }

 static int oa_tc6_enable_data_transfer(struct oa_tc6 *tc6)


5 - simple
This keeps the interrupt but does not log or drop the socket buffer on irq
Moving the rx dropped increment here makes it more of a irq counter I guess,
so maybe not relevant.

diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c
index 9f17f3712137..cbc20a725ad0 100644
--- a/drivers/net/ethernet/oa_tc6.c
+++ b/drivers/net/ethernet/oa_tc6.c
@@ -687,10 +687,7 @@ static int oa_tc6_process_extended_status(struct oa_tc6 *tc6)

        if (FIELD_GET(STATUS0_RX_BUFFER_OVERFLOW_ERROR, value)) {
                tc6->rx_buf_overflow = true;
-               oa_tc6_cleanup_ongoing_rx_skb(tc6);
-               net_err_ratelimited("%s: Receive buffer overflow error\n",
-                                   tc6->netdev->name);
-               return -EAGAIN;
+               tc6->netdev->stats.rx_dropped++;
        }
        if (FIELD_GET(STATUS0_TX_PROTOCOL_ERROR, value)) {
                netdev_err(tc6->netdev, "Transmit protocol error\n");


- postamble -

Removing the logging made things considerably slower which probably
indicates that there is a timing dependent behaviour in the driver.

I have a hard time explaining why there is a throughput difference
between scenario 3 and 4 since I did not get the logs that any of the
other interrupts happened.
Maybe the irq handling adds some unexpected context switching overhead.

My recommendation going forward would be to disable the rx buffer
overlow interrupt and removing any code related to handling of it.

R
Andrew Lunn April 28, 2024, 2:48 p.m. UTC | #6
On Sat, Apr 27, 2024 at 11:55:13PM +0200, Ramón Nordin Rodriguez wrote:
> > How fast is your SPI bus? Faster than the link speed? Or slower?
> > 
> > It could be different behaviour is needed depending on the SPI bus
> > speed. If the SPI bus is faster than the link speed, by some margin,
> > the receiver buffer should not overflow, since the CPU can empty the
> > buffer faster than it fills.
> 
> I'm running at 25MHz, I'm guessing that should translate to fast enough
> for the 10MBit half duplex link.
> But I'm not sure how the spi clock translates to bps here.

That seems plenty fast. Maybe you can get a bus pirate or similar
sniffing the bus. Maybe there are big gaps between the transfers for
some reason? Or the interrupt controller is very slow?

> I'm guessing you are right and that the others actually would be
> meningful to log.
> There is a nested question here as well, and that is wheter to keep or
> drop the code that drops the rx buffer on overflow interrupt.
> I think not dropping the full buffer could be one of the reasons for the
> perf change.

You need to look careful at what a buffer overflow means, as written
in the standard. Does it mean a chunk has been dropped from the frame
currently being transferred over the SPI bus? If so, you need to drop
the frame, because it is missing 64 bytes somewhere. That could happen
if the device has very minimal buffering and does cut through. So the
frame goes straight to the SPI bus while it is still being received
from the line. Or the device could have sufficient buffers to hold a
few full frames. It has run out of such buffers while receiving, and
so dropped the frame. You never see that frame over SPI because it has
already been discarded. If so, linux should not be dropping anything,
the device already has.

Given your 25Mhz bus speed, i think there at least two things wrong
here. Dropping frames is too noise, and potentially doing it wrong. I
also think there is something not optimal in your SPI master, because
25MHz should not have trouble with 10Mbps line speed.

	Andrew
Andrew Lunn April 28, 2024, 2:59 p.m. UTC | #7
> n  |  name     |  min  |  avg  |  max  |  rx dropped  |  samples
> 1  |  no mod   |  827K |  846K |  891K |      945     |     5
> 2  |  no log   |  711K |  726K |  744K |      562     |     5
> 3  |  less irq |  815K |  833K |  846K |      N/A     |     5
> 4  |  no irq   |  914K |  924K |  931K |      N/A     |     5
> 5  |  simple   |  857K |  868K |  879K |      615     |     5

That is odd.

Side question: What CONFIG_HZ= do you have? 100, 250, 1000?  Try
1000. I've seen problems where the driver wants to sleep for a short
time, but the CONFIG_HZ value limits how short a time it can actually
sleep. It ends up sleeping much longer than it wants.

	Andrew
Ramón Nordin Rodriguez April 28, 2024, 10:04 p.m. UTC | #8
On Sun, Apr 28, 2024 at 04:59:40PM +0200, Andrew Lunn wrote:
> > n  |  name     |  min  |  avg  |  max  |  rx dropped  |  samples
> > 1  |  no mod   |  827K |  846K |  891K |      945     |     5
> > 2  |  no log   |  711K |  726K |  744K |      562     |     5
> > 3  |  less irq |  815K |  833K |  846K |      N/A     |     5
> > 4  |  no irq   |  914K |  924K |  931K |      N/A     |     5
> > 5  |  simple   |  857K |  868K |  879K |      615     |     5
> 
> That is odd.
> 
> Side question: What CONFIG_HZ= do you have? 100, 250, 1000?  Try
> 1000. I've seen problems where the driver wants to sleep for a short
> time, but the CONFIG_HZ value limits how short a time it can actually
> sleep. It ends up sleeping much longer than it wants.
> 

Good catch, had it set to 250. I'll rebuild with CONFIG_HZ=1000 and
rerun the tests.
Ramón Nordin Rodriguez May 1, 2024, 6:29 p.m. UTC | #9
> > n  |  name     |  min  |  avg  |  max  |  rx dropped  |  samples
> > 1  |  no mod   |  827K |  846K |  891K |      945     |     5
> > 2  |  no log   |  711K |  726K |  744K |      562     |     5
> > 3  |  less irq |  815K |  833K |  846K |      N/A     |     5
> > 4  |  no irq   |  914K |  924K |  931K |      N/A     |     5
> > 5  |  simple   |  857K |  868K |  879K |      615     |     5
> 
> That is odd.
> 
> Side question: What CONFIG_HZ= do you have? 100, 250, 1000?  Try
> 1000. I've seen problems where the driver wants to sleep for a short
> time, but the CONFIG_HZ value limits how short a time it can actually
> sleep. It ends up sleeping much longer than it wants.
> 

I have been doing my best to abuse the link some more. In brief tweaking
CONFIG_HZ has some but limited effect. 
Saturating the link with the rx buffer interrupt enabled breaks the driver.
Saturating the link with the rx buffer interrupt disabled has poor
performance.

The following scenario has been tested. Both ends of the link run:
* server.py
* client.py

One end is an arm64 quad core running at 1.2GHz with the lan8650 macphy.
The other end is an amd 3950x running the lan8670 usb eval board.
Both systems should be fast enough that running python should not be a
limiting factor.

-- The test code --
server.py
#!/bin/env python3
import socket

def serve(sock: socket.socket):
    while True:
        client, addr = sock.accept()
        print(f'connection from: {addr}')
        while len(client.recv(2048)) > 0:
            pass
        print('client disconnected')
        client.close()

if __name__ == '__main__':
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    sock.bind(('0.0.0.0', 4040))
    sock.listen(1)
    serve(sock)
    print("something went wrong")

client.py
#!/bin/env python3
import socket
import sys

if __name__ == '__main__':
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.connect((sys.argv[1], 4040))

    while True:
        sock.sendall(b'0'*2048)

-- test runs --
run 1 - all interrupts enabled
Time to failure:
1 min or less

Kernel output:
[   94.361312] sched: RT throttling activated

top output:
 PID USER      PR  NI    VIRT    RES    SHR S  %CPU  %MEM     TIME+ COMMAND
 145 root     -51   0       0      0      0 R  95.5   0.0   1:11.22 oa-tc6-spi-thread

link stats:
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
    link/ether 32:c2:7e:22:93:99 brd ff:ff:ff:ff:ff:ff
    RX:  bytes packets errors dropped  missed   mcast
       3371902    7186      0      48       0       0
    RX errors:  length    crc   frame    fifo overrun
                     0      0       0       0       0
    TX:  bytes packets errors dropped carrier collsns
      10341438    8071      0       0       0       0
    TX errors: aborted   fifo  window heartbt transns
                     0      0       0       0       1
state:
Completly borked, can't ping in or out, bringing the interface down then up
has no effect.
There is no SPI clock and no interrupts generated by the mac-phy.
The worker thread seems to have live locked.

-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
run 2 - RX_BUFFER_OVERLOW interrupt disabled

state:
Runs just fine but the oa-tc6-spi-thread is consuming 10-20% cpu
Ping times have increased from 1-2ms to 8-35ms


-- additional notes --
When tweaking CONFIG_HZ I do get some changes in behaviour, the cpu
consumption stays stable at 20%+-2 with CONFIG_HZ=250, when increased to
CONFIG_HZ=1000 it jumps up and down between 10-20%.

I don't have access to a logic analyzer but my old oscilloscope is
almost reliable. I could confirm that the spi clock is indeed running at
the expected 25MHz, but I could observe some gaps of up to 320µs so
that's 8k spi cycles spent doing something else.
These gaps were observed on the SPI clock and the macphy interrupt was
active for the same ammount of time(though this was measured independently
and not on the same trigger).
I've been drinking way to much coffe, so soldering is not gonna happen
today (shaky hands), but if it helps I can solder wires to attach both
probes to confirm that the gap in the SPI clock happens at the same time
or not as the interrupt is active.

I'd be keen on hearing what Microchips plans to address. If tracking
down performance issues is a priority I'll probably not spend any time
on it, if not then I'll definetly dig into it more.

Let me know if anything is unclear or if I can help out with anything
specific.

R
Andrew Lunn May 1, 2024, 7:58 p.m. UTC | #10
> Kernel output:
> [   94.361312] sched: RT throttling activated
> 
> top output:
>  PID USER      PR  NI    VIRT    RES    SHR S  %CPU  %MEM     TIME+ COMMAND
>  145 root     -51   0       0      0      0 R  95.5   0.0   1:11.22 oa-tc6-spi-thread
> 
> link stats:
> 3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
>     link/ether 32:c2:7e:22:93:99 brd ff:ff:ff:ff:ff:ff
>     RX:  bytes packets errors dropped  missed   mcast
>        3371902    7186      0      48       0       0
>     RX errors:  length    crc   frame    fifo overrun
>                      0      0       0       0       0
>     TX:  bytes packets errors dropped carrier collsns
>       10341438    8071      0       0       0       0
>     TX errors: aborted   fifo  window heartbt transns
>                      0      0       0       0       1
> state:
> Completly borked, can't ping in or out, bringing the interface down then up
> has no effect.

Completely borked is not good. 

> I'd be keen on hearing what Microchips plans to address. If tracking
> down performance issues is a priority I'll probably not spend any time
> on it, if not then I'll definetly dig into it more.

Borked ! = performance issue. This should be addressed. Lets see if
the python scripts are enough to be able to reproduce it.

We should not rule out differences in SPI drivers. So it might be you
need to debug this, if Microchip cannot reproduce it.

I would also suggest you enable the "kernel hacking" debug features
for finding deadlocks, bad spinlocks, stuck RT tasks etc. That might
give some clues.

	Andrew
Andrew Lunn May 1, 2024, 8:07 p.m. UTC | #11
> I don't have access to a logic analyzer but my old oscilloscope is
> almost reliable. I could confirm that the spi clock is indeed running at
> the expected 25MHz, but I could observe some gaps of up to 320µs so
> that's 8k spi cycles spent doing something else.
> These gaps were observed on the SPI clock and the macphy interrupt was
> active for the same ammount of time(though this was measured independently
> and not on the same trigger).
> I've been drinking way to much coffe, so soldering is not gonna happen
> today (shaky hands), but if it helps I can solder wires to attach both
> probes to confirm that the gap in the SPI clock happens at the same time
> or not as the interrupt is active.

What i expect you will see is that the interrupt line goes active. A
while later there is an SPI bus transfer to fetch the interrupt
status, and when that transfer completes the interrupt should go
inactive. But you will need your second channel to confirm this.

One question would be, is 320µs reasonable for an interrupt, and one
SPI transfer?

I would then expect a number of back to back transfers, each around 64
bytes in size, as it gets the received frame. The gaps between those
transfers will be interesting.

	Andrew
Parthiban Veerasooran May 2, 2024, 10:10 a.m. UTC | #12
Hi Ramon,

On 01/05/24 11:59 pm, Ramón Nordin Rodriguez wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
>>> n  |  name     |  min  |  avg  |  max  |  rx dropped  |  samples
>>> 1  |  no mod   |  827K |  846K |  891K |      945     |     5
>>> 2  |  no log   |  711K |  726K |  744K |      562     |     5
>>> 3  |  less irq |  815K |  833K |  846K |      N/A     |     5
>>> 4  |  no irq   |  914K |  924K |  931K |      N/A     |     5
>>> 5  |  simple   |  857K |  868K |  879K |      615     |     5
>>
>> That is odd.
>>
>> Side question: What CONFIG_HZ= do you have? 100, 250, 1000?  Try
>> 1000. I've seen problems where the driver wants to sleep for a short
>> time, but the CONFIG_HZ value limits how short a time it can actually
>> sleep. It ends up sleeping much longer than it wants.
>>
> 
> I have been doing my best to abuse the link some more. In brief tweaking
> CONFIG_HZ has some but limited effect.
> Saturating the link with the rx buffer interrupt enabled breaks the driver.
> Saturating the link with the rx buffer interrupt disabled has poor
> performance.
> 
> The following scenario has been tested. Both ends of the link run:
> * server.py
> * client.py
> 
> One end is an arm64 quad core running at 1.2GHz with the lan8650 macphy.
> The other end is an amd 3950x running the lan8670 usb eval board.
> Both systems should be fast enough that running python should not be a
> limiting factor.
> 
> -- The test code --
> server.py
> #!/bin/env python3
> import socket
> 
> def serve(sock: socket.socket):
>      while True:
>          client, addr = sock.accept()
>          print(f'connection from: {addr}')
>          while len(client.recv(2048)) > 0:
>              pass
>          print('client disconnected')
>          client.close()
> 
> if __name__ == '__main__':
>      sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
>      sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
>      sock.bind(('0.0.0.0', 4040))
>      sock.listen(1)
>      serve(sock)
>      print("something went wrong")
> 
> client.py
> #!/bin/env python3
> import socket
> import sys
> 
> if __name__ == '__main__':
>      sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
>      sock.connect((sys.argv[1], 4040))
> 
>      while True:
>          sock.sendall(b'0'*2048)
> 
> -- test runs --
> run 1 - all interrupts enabled
> Time to failure:
> 1 min or less
> 
> Kernel output:
> [   94.361312] sched: RT throttling activated
> 
> top output:
>   PID USER      PR  NI    VIRT    RES    SHR S  %CPU  %MEM     TIME+ COMMAND
>   145 root     -51   0       0      0      0 R  95.5   0.0   1:11.22 oa-tc6-spi-thread
> 
> link stats:
> 3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
>      link/ether 32:c2:7e:22:93:99 brd ff:ff:ff:ff:ff:ff
>      RX:  bytes packets errors dropped  missed   mcast
>         3371902    7186      0      48       0       0
>      RX errors:  length    crc   frame    fifo overrun
>                       0      0       0       0       0
>      TX:  bytes packets errors dropped carrier collsns
>        10341438    8071      0       0       0       0
>      TX errors: aborted   fifo  window heartbt transns
>                       0      0       0       0       1
> state:
> Completly borked, can't ping in or out, bringing the interface down then up
> has no effect.
> There is no SPI clock and no interrupts generated by the mac-phy.
> The worker thread seems to have live locked.
> 
> -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
> run 2 - RX_BUFFER_OVERLOW interrupt disabled
> 
> state:
> Runs just fine but the oa-tc6-spi-thread is consuming 10-20% cpu
> Ping times have increased from 1-2ms to 8-35ms
> 
> 
> -- additional notes --
> When tweaking CONFIG_HZ I do get some changes in behaviour, the cpu
> consumption stays stable at 20%+-2 with CONFIG_HZ=250, when increased to
> CONFIG_HZ=1000 it jumps up and down between 10-20%.
> 
> I don't have access to a logic analyzer but my old oscilloscope is
> almost reliable. I could confirm that the spi clock is indeed running at
> the expected 25MHz, but I could observe some gaps of up to 320µs so
> that's 8k spi cycles spent doing something else.
> These gaps were observed on the SPI clock and the macphy interrupt was
> active for the same ammount of time(though this was measured independently
> and not on the same trigger).
> I've been drinking way to much coffe, so soldering is not gonna happen
> today (shaky hands), but if it helps I can solder wires to attach both
> probes to confirm that the gap in the SPI clock happens at the same time
> or not as the interrupt is active.
> 
> I'd be keen on hearing what Microchips plans to address. If tracking
> down performance issues is a priority I'll probably not spend any time
> on it, if not then I'll definetly dig into it more.
I tried to reproduce the issue in my setup with your above applications.
But surprisingly I couldn't reproduce the issue you are facing.

One end is Raspberry Pi 4 with lan8651 MAC-PHY and the other end is 
Raspberry Pi 4 with EVB-LAN8670-USB Stick.

lan8651 MAC-PHY side:
---------------------

pi@raspberrypi:~/lan865x/v4_mainline $ python3 server.py
connection from: ('192.168.5.101', 46608)

Top output:
-----------
top - 14:28:14 up  1:14,  4 users,  load average: 0.68, 0.67, 0.63
Tasks: 201 total,   1 running, 200 sleeping,   0 stopped,   0 zombie
%Cpu(s):  0.2 us,  1.1 sy,  0.0 ni, 98.7 id,  0.0 wa,  0.0 hi,  0.1 si, 
0.0 st
MiB Mem :   7810.0 total,   7110.5 free,    392.3 used,    432.4 
buff/cache
MiB Swap:    100.0 total,    100.0 free,      0.0 used.   7417.7 avail Mem

     PID USER      PR  NI    VIRT    RES    SHR S  %CPU  %MEM     TIME+ 
COMMAND 

    7219 root     -51   0       0      0      0 S   2.7   0.0   1:13.11 
oa-tc6-spi-thread 

   35307 pi        20   0   16280   9216   5248 S   0.7   0.1   0:16.53 
python3 

   88831 pi        20   0   11728   4864   2816 R   0.3   0.1   0:00.22 
top 

   89819 vnc       20   0    2320    384    384 S   0.3   0.0   0:00.04 
sh 

       1 root      20   0  168652  11580   8436 S   0.0   0.1   0:03.85 
systemd 

       2 root      20   0       0      0      0 S   0.0   0.0   0:00.01 
kthreadd 

       3 root      20   0       0      0      0 S   0.0   0.0   0:00.00 
pool_workqueue_release 

       4 root       0 -20       0      0      0 I   0.0   0.0   0:00.00 
kworker/R-rcu_g 

       5 root       0 -20       0      0      0 I   0.0   0.0   0:00.00 
kworker/R-rcu_p 

       6 root       0 -20       0      0      0 I   0.0   0.0   0:00.00 
kworker/R-slub_ 

       7 root       0 -20       0      0      0 I   0.0   0.0   0:00.00 
kworker/R-netns 

      11 root      20   0       0      0      0 I   0.0   0.0   0:00.03 
kworker/u8:0-ext4-rsv-conversion 

      12 root       0 -20       0      0      0 I   0.0   0.0   0:00.00 
kworker/R-mm_pe

ifconfig output:
----------------
eth1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
         inet 192.168.5.100  netmask 255.255.255.0  broadcast 0.0.0.0
         ether 04:05:06:01:02:03  txqueuelen 1000  (Ethernet)
         RX packets 1589879  bytes 2391045582 (2.2 GiB)
         RX errors 0  dropped 0  overruns 0  frame 0
         TX packets 1022419  bytes 71954905 (68.6 MiB)
         TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
         device interrupt 55

dmesg output:
-------------
pi@raspberrypi:~/lan865x/v4_mainline $ dmesg
[  234.019968] LAN865X Rev.B0 Internal Phy spi0.0:00: attached PHY 
driver (mii_bus:phy_addr=spi0.0:00, irq=POLL)
[  234.069387] lan865x spi0.0 eth1: Link is Up - 10Mbps/Half - flow 
control off

EVB-LAN8670-USB stick side:
---------------------------
pi@pv-rpi-tp2:~/microchip/t1s-usb $ python3 client.py 192.168.5.100

ifconfig output:
----------------
eth1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
         inet 192.168.5.101  netmask 255.255.255.0  broadcast 192.168.5.255
         ether 00:1e:c0:d1:ca:39  txqueuelen 1000  (Ethernet)
         RX packets 1034606  bytes 58330335 (55.6 MiB)
         RX errors 0  dropped 0  overruns 0  frame 0
         TX packets 1722400  bytes 2628199197 (2.4 GiB)
         TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

Let me know if you want to try anything more.

Best regards,
Parthiban V
> 
> Let me know if anything is unclear or if I can help out with anything
> specific.
> 
> R
Ramón Nordin Rodriguez May 2, 2024, 10:19 a.m. UTC | #13
> I tried to reproduce the issue in my setup with your above applications.
> But surprisingly I couldn't reproduce the issue you are facing.
> 
> One end is Raspberry Pi 4 with lan8651 MAC-PHY and the other end is 
> Raspberry Pi 4 with EVB-LAN8670-USB Stick.
> 
> lan8651 MAC-PHY side:
> ---------------------
> 

Did you run both client and server on both ends? The previous tests I
have done have only sent traffic in one direction, which did not lead to
a live lock.
But both sending and receiving as fast as possible in both directions
trigger the problems on my end.
R
Parthiban Veerasooran May 3, 2024, 7:10 a.m. UTC | #14
On 02/05/24 3:49 pm, Ramón Nordin Rodriguez wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
>> I tried to reproduce the issue in my setup with your above applications.
>> But surprisingly I couldn't reproduce the issue you are facing.
>>
>> One end is Raspberry Pi 4 with lan8651 MAC-PHY and the other end is
>> Raspberry Pi 4 with EVB-LAN8670-USB Stick.
>>
>> lan8651 MAC-PHY side:
>> ---------------------
>>
> 
> Did you run both client and server on both ends? The previous tests I
> have done have only sent traffic in one direction, which did not lead to
> a live lock.
> But both sending and receiving as fast as possible in both directions
> trigger the problems on my end.
OK, today I executed both the client and server on both the sides. It is 
running more than an hour, it didn't crash. But there are some "Receive 
buffer overflow error" time to time in the dmesg. I don't know whether 
this is an expected behavior with this type of stress test.

top output:
-----------

pi@raspberrypi:~/lan865x/v4_mainline $ top

top - 19:47:07 up 54 min,  5 users,  load average: 1.23, 1.20, 1.14
Tasks: 205 total,   1 running, 204 sleeping,   0 stopped,   0 zombie
%Cpu(s):  0.3 us,  7.5 sy,  0.0 ni, 92.1 id,  0.0 wa,  0.0 hi,  0.2 si, 
0.0 st
MiB Mem :   7810.0 total,   7013.5 free,    402.6 used,    518.0 
buff/cache
MiB Swap:    100.0 total,    100.0 free,      0.0 used.   7407.5 avail Mem

     PID USER      PR  NI    VIRT    RES    SHR S  %CPU  %MEM     TIME+ 
COMMAND 

    7218 root     -51   0       0      0      0 D  28.6   0.0  13:51.12 
oa-tc6-spi-thread 

   63853 vnc       20   0    2320    384    384 S   0.7   0.0   0:00.23 
sh 

   65090 pi        20   0   11728   4992   2816 R   0.7   0.1   0:00.04 
top 

      17 root      20   0       0      0      0 I   0.3   0.0   0:01.74 
rcu_preempt 

      27 root      20   0       0      0      0 S   0.3   0.0   0:16.67 
ksoftirqd/2 

     268 root      20   0   50024  16132  15108 S   0.3   0.2   0:02.02 
systemd-journal 

     956 pi        20   0  699196  37896  29160 S   0.3   0.5   0:03.60 
wireplumber 

    7328 pi        20   0   16280   9216   5248 S   0.3   0.1   0:07.61 
python3 

       1 root      20   0  168692  11340   8320 S   0.0   0.1   0:03.67 
systemd 

       2 root      20   0       0      0      0 S   0.0   0.0   0:00.01 
kthreadd

ifconfig output:
----------------

eth1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
         inet 192.168.5.100  netmask 255.255.255.0  broadcast 0.0.0.0
         ether 04:05:06:01:02:03  txqueuelen 1000  (Ethernet)
         RX packets 1944541  bytes 909191168 (867.0 MiB)
         RX errors 0  dropped 2  overruns 0  frame 0
         TX packets 2402855  bytes 3129162624 (2.9 GiB)
         TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
         device interrupt 55

dmesg output:
-------------

[  285.482275] LAN865X Rev.B0 Internal Phy spi0.0:00: attached PHY 
driver (mii_bus:phy_addr=spi0.0:00, irq=POLL)
[  285.534760] lan865x spi0.0 eth1: Link is Up - 10Mbps/Half - flow 
control off
[  341.466221] eth1: Receive buffer overflow error
[  345.730222] eth1: Receive buffer overflow error
[  345.891126] eth1: Receive buffer overflow error
[  346.074220] eth1: Receive buffer overflow error
[  354.374223] eth1: Receive buffer overflow error
[  354.375422] eth1: Receive buffer overflow error
[  355.786219] eth1: Receive buffer overflow error
[  356.898221] eth1: Receive buffer overflow error
[  357.782219] eth1: Receive buffer overflow error
[  357.783194] eth1: Receive buffer overflow error
[  365.446224] eth1: Receive buffer overflow error
[  387.722221] eth1: Receive buffer overflow error
[  387.790218] eth1: Receive buffer overflow error
[  387.950224] eth1: Receive buffer overflow error
[  387.951199] eth1: Receive buffer overflow error
[  388.790219] eth1: Receive buffer overflow error
[  389.010220] eth1: Receive buffer overflow error
[  391.066221] eth1: Receive buffer overflow error
[  409.746225] eth1: Receive buffer overflow error
[  442.835278] eth1: Receive buffer overflow error
[  447.410222] eth1: Receive buffer overflow error
[  447.510220] eth1: Receive buffer overflow error
[  447.758219] eth1: Receive buffer overflow error
[  448.066221] eth1: Receive buffer overflow error
[  449.590220] eth1: Receive buffer overflow error
[  450.686221] eth1: Receive buffer overflow error
[  451.238222] eth1: Receive buffer overflow error
[  451.490220] eth1: Receive buffer overflow error
[  472.254221] eth1: Receive buffer overflow error
[  473.302222] eth1: Receive buffer overflow error
[  474.070220] eth1: Receive buffer overflow error
[  475.262220] eth1: Receive buffer overflow error
[  475.362223] eth1: Receive buffer overflow error
[  477.362224] eth1: Receive buffer overflow error
[  501.918221] eth1: Receive buffer overflow error
[  505.510219] eth1: Receive buffer overflow error
[  516.466220] eth1: Receive buffer overflow error
[  600.830221] eth1: Receive buffer overflow error
[  675.614219] eth1: Receive buffer overflow error
[  676.318223] eth1: Receive buffer overflow error
[  676.594223] eth1: Receive buffer overflow error
[  678.702219] eth1: Receive buffer overflow error
[  679.102219] eth1: Receive buffer overflow error
[  679.642218] eth1: Receive buffer overflow error
[  679.894218] eth1: Receive buffer overflow error
[  679.906224] eth1: Receive buffer overflow error
[  682.529386] eth1: Receive buffer overflow error
[  682.786221] eth1: Receive buffer overflow error
[  703.374220] eth1: Receive buffer overflow error
[  703.462219] eth1: Receive buffer overflow error
[  716.606222] eth1: Receive buffer overflow error
[  717.134219] eth1: Receive buffer overflow error
[  722.838219] eth1: Receive buffer overflow error
[  801.522220] eth1: Receive buffer overflow error
[  871.946226] eth1: Receive buffer overflow error
[ 1304.190220] eth1: Receive buffer overflow error
[ 1307.734221] eth1: Receive buffer overflow error
[ 1309.738224] eth1: Receive buffer overflow error
[ 1605.222219] eth1: Receive buffer overflow error
[ 1739.927121] eth1: Receive buffer overflow error
[ 2240.002213] eth1: Receive buffer overflow error
[ 2241.355378] eth1: Receive buffer overflow error
[ 2300.374218] eth1: Receive buffer overflow error
[ 2332.850214] eth1: Receive buffer overflow error
[ 2347.322215] eth1: Receive buffer overflow error
[ 2377.702216] eth1: Receive buffer overflow error
[ 2393.942214] eth1: Receive buffer overflow error
[ 2404.986214] eth1: Receive buffer overflow error
[ 2407.538216] eth1: Receive buffer overflow error
[ 2407.539190] eth1: Receive buffer overflow error
[ 2411.266213] eth1: Receive buffer overflow error
[ 2421.542220] eth1: Receive buffer overflow error
[ 2713.142214] eth1: Receive buffer overflow error
[ 3401.226215] eth1: Receive buffer overflow error
[ 3494.394214] eth1: Receive buffer overflow error
[ 3511.850214] eth1: Receive buffer overflow error


Best regards,
Parthiban V
> R
>
Andrew Lunn May 6, 2024, 1:21 a.m. UTC | #15
> [  285.482275] LAN865X Rev.B0 Internal Phy spi0.0:00: attached PHY 
> driver (mii_bus:phy_addr=spi0.0:00, irq=POLL)
> [  285.534760] lan865x spi0.0 eth1: Link is Up - 10Mbps/Half - flow 
> control off
> [  341.466221] eth1: Receive buffer overflow error
> [  345.730222] eth1: Receive buffer overflow error
> [  345.891126] eth1: Receive buffer overflow error
> [  346.074220] eth1: Receive buffer overflow error

Generally we only log real errors. Is a receive buffer overflow a real
error? I would say not. But it would be good to count it.

There was also the open question, what exactly does a receive buffer
overflow mean?

The spec says:

  9.2.8.11 RXBOE

  Receive Buffer Overflow Error. When set, this bit indicates that the
  receive buffer (from the network) has overflowed and receive frame
  data was lost.

Which is a bit ambiguous. I would hope it means the receiver has
dropped the packet. It will not be passed to the host. But other than
maybe count it, i don't think there is anything to do. But Ramón was
suggesting you actually drop the frame currently be transferred over
the SPI bus?

	Andrew
Piergiorgio Beruto May 6, 2024, 6:47 a.m. UTC | #16
HI all,

The RXBOE is basically a warning, indicating that for some reason the SPI host is not fast enough in retrieving frames from the device.
In my experience, this is typically caused by excessive latency of the SPI driver, or if you have an unoptimized network driver for the MACPHY.

And no, you would certainly NOT drop a frame that is transferred over the SPI bus. It is the frame at the ingress of the device that will be dropped (ie the one at the end of the queue, not the beginning)..
Just my 0.2cent

Thanks,
Piergiorgio

-----Original Message-----
From: Andrew Lunn <andrew@lunn.ch> 
Sent: 6 May, 2024 03:21
To: Parthiban.Veerasooran@microchip.com
Cc: ramon.nordin.rodriguez@ferroamp.se; davem@davemloft.net; edumazet@google.com; kuba@kernel.org; pabeni@redhat.com; horms@kernel.org; saeedm@nvidia.com; anthony.l.nguyen@intel.com; netdev@vger.kernel.org; linux-kernel@vger.kernel.org; corbet@lwn.net; linux-doc@vger.kernel.org; robh+dt@kernel.org; krzysztof.kozlowski+dt@linaro.org; conor+dt@kernel.org; devicetree@vger.kernel.org; Horatiu.Vultur@microchip.com; ruanjinjie@huawei.com; Steen.Hegelund@microchip.com; vladimir.oltean@nxp.com; UNGLinuxDriver@microchip.com; Thorsten.Kummermehr@microchip.com; Piergiorgio Beruto <Pier.Beruto@onsemi.com>; Selvamani Rajagopal <Selvamani.Rajagopal@onsemi.com>; Nicolas.Ferre@microchip.com; benjamin.bigler@bernformulastudent.ch
Subject: Re: [PATCH net-next v4 05/12] net: ethernet: oa_tc6: implement error interrupts unmasking

[External Email]: This email arrived from an external source - Please exercise caution when opening any attachments or clicking on links.


> [  285.482275] LAN865X Rev.B0 Internal Phy spi0.0:00: attached PHY 
> driver (mii_bus:phy_addr=spi0.0:00, irq=POLL) [  285.534760] lan865x 
> spi0.0 eth1: Link is Up - 10Mbps/Half - flow control off [  
> 341.466221] eth1: Receive buffer overflow error [  345.730222] eth1: 
> Receive buffer overflow error [  345.891126] eth1: Receive buffer 
> overflow error [  346.074220] eth1: Receive buffer overflow error

Generally we only log real errors. Is a receive buffer overflow a real error? I would say not. But it would be good to count it.

There was also the open question, what exactly does a receive buffer overflow mean?

The spec says:

  9.2.8.11 RXBOE

  Receive Buffer Overflow Error. When set, this bit indicates that the
  receive buffer (from the network) has overflowed and receive frame
  data was lost.

Which is a bit ambiguous. I would hope it means the receiver has dropped the packet. It will not be passed to the host. But other than maybe count it, i don't think there is anything to do. But Ramón was suggesting you actually drop the frame currently be transferred over the SPI bus?

	Andrew
Ramón Nordin Rodriguez May 7, 2024, 12:44 p.m. UTC | #17
> The RXBOE is basically a warning, indicating that for some reason the SPI host is not fast enough in retrieving frames from the device.
> In my experience, this is typically caused by excessive latency of the SPI driver, or if you have an unoptimized network driver for the MACPHY.

Definetly tracks with what I'm seeing, got two macphys, one is sharing
the bus with a can transiever, getting more RX overflows on that
interface.

> Thanks,
> Piergiorgio
> 
> Which is a bit ambiguous. I would hope it means the receiver has dropped the packet. It will not be passed to the host. But other than maybe count it, i don't think there is anything to do. But Ramón was suggesting you actually drop the frame currently be transferred over the SPI bus?
> 
> 	Andrew

This was misscommunicated on my part. In my happy-hacking adventures I
got better throughput when dropping more chunks in the driver code.

--

Suddenly I can't reproduce the hang, I'll give it one more attempt today
though. Did some measurements with a better oscilloscope, I'm seeing a
periodic latency of ~500us for clearing the irq, typically it's
considerably shorter, and a periodic ~1ms silence on the wire.

But since I can't reproduce the hang this is just performance related.

R
Ramón Nordin Rodriguez May 13, 2024, 6:41 a.m. UTC | #18
> The RXBOE is basically a warning, indicating that for some reason the SPI host is not fast enough in retrieving frames from the device.
> In my experience, this is typically caused by excessive latency of the SPI driver, or if you have an unoptimized network driver for the MACPHY.
> 
> Thanks,
> Piergiorgio
> 
> 
> > [  285.482275] LAN865X Rev.B0 Internal Phy spi0.0:00: attached PHY 
> > driver (mii_bus:phy_addr=spi0.0:00, irq=POLL) [  285.534760] lan865x 
> > spi0.0 eth1: Link is Up - 10Mbps/Half - flow control off [  
> > 341.466221] eth1: Receive buffer overflow error [  345.730222] eth1: 
> > Receive buffer overflow error [  345.891126] eth1: Receive buffer 
> > overflow error [  346.074220] eth1: Receive buffer overflow error
> 
> Generally we only log real errors. Is a receive buffer overflow a real error? I would say not. But it would be good to count it.
> 	Andrew

Hi,

I've been busy throwing stuff at the wall until something sticks. I've
managed to narrow a few things down.
First and foremost, when running a periodic udp6 multicast in the
background I don't get a hang in the driver, or it becomes considerably
harder to provoke.

When I make sure that the bespoke Ferroamp upd server is not started
(which just joins a mcast group and sends a less than MTU packet every
~500ms and listens for incoming multicast messages in the same group),
it becomes very simple to get to a live-lock.

My steasp of reproducing is setting a ipv4 address on both ends of the
link, then running the follwing script on both ends using the other ends
ip as argument.

#!/bin/env python3
import socket
import sys

if __name__ == '__main__':
    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

    while True:
        sock.sendto(b'0'*2048, (sys.argv[1], 4040))

Neither ends opens a listening socket. I get to the live lock in 10s or
less.
I've enabled some debugging options but so far nothing seems to hit.
What I've been able to conclude is that there still is SPI
communication, the macphy interrupt is still pulled low, and the cpu
does the ack so that it's reset to inactive.
But from there it seems no data is passed up the network stack. Some
symptoms are

* net_device stats are no longer incremented
* can't ping
* can't connect to sockets on the board etc.
* cpu usage jumps to and stays at 100% for the worker thread

The worker thread is released by the irq handler and it does some of the
expected work, but not all.
I'm adding some instrumentation to the code in an effort to figure out
where things break apart.
It might be possible to catch it in gdb as well, but I think you only
get one try as the timing will be pretty borked after the first break.

R
Andrew Lunn May 13, 2024, 1 p.m. UTC | #19
> I've enabled some debugging options but so far nothing seems to hit.
> What I've been able to conclude is that there still is SPI
> communication, the macphy interrupt is still pulled low, and the cpu
> does the ack so that it's reset to inactive.

Is it doing this in an endless cycle?

Probably the debug tools are not showing anything because it is not
looping in just one location. It is a complex loop, interrupts
triggering a thread which runs to completion etc. So it looks like
normal behaviour.

If it is an endless cycle, it sounds like an interrupt storm. Some
interrupt bit is not getting cleared, so it immediately fires again as
soon as interrupts are enabled.

Is this your dual device board? Do you have both devices on the same
SPI bus? Do they share interrupt lines?

	Andrew
Ramón Nordin Rodriguez May 13, 2024, 1:50 p.m. UTC | #20
On Mon, May 13, 2024 at 03:00:48PM +0200, Andrew Lunn wrote:
> > I've enabled some debugging options but so far nothing seems to hit.
> > What I've been able to conclude is that there still is SPI
> > communication, the macphy interrupt is still pulled low, and the cpu
> > does the ack so that it's reset to inactive.
> 
> Is it doing this in an endless cycle?

Exactly, so what I'm seeing is when the driver livelocks the macphy is
periodically pulling the irq pin low, the driver clears the interrupt
and repeat.

> 
> Probably the debug tools are not showing anything because it is not
> looping in just one location. It is a complex loop, interrupts
> triggering a thread which runs to completion etc. So it looks like
> normal behaviour.

Gotcha. The 'do work' func called in the worker threads loop does run
and return, so I guess there is not much to trigger on.

> 
> If it is an endless cycle, it sounds like an interrupt storm. Some
> interrupt bit is not getting cleared, so it immediately fires again as
> soon as interrupts are enabled.

Good input. I'll add some instrumentation/stats for how many jiffies
have elapsed between releases of the worker thread and for the irq
handler. I can probably find a gpio to toggle as well if it's really
tight timings.

The irq pin is inactive/high for 100s of us to ms in the measurments
I've done. But I've been using multiple channels and not the fanciest
equipment so samplerates might be playing tricks, I'll rerun some tests
while only measuring the irq pin.

> 
> Is this your dual device board? Do you have both devices on the same
> SPI bus? Do they share interrupt lines?
> 

It's on the dual device board, the macphys are using separate spi buses,
one chip shares the bus with another spi device, but the other is the
only tenant on the bus.

No device shares an irq line.

Pretty sure I can replicate the result for both devices, but need to 
double check, been to much testing of random things for me to keep track.

I'll do some more digging, I think we're getting pretty close to
understading the behaviour now.

R
Andrew Lunn May 13, 2024, 2:04 p.m. UTC | #21
> Good input. I'll add some instrumentation/stats for how many jiffies
> have elapsed between releases of the worker thread and for the irq
> handler. I can probably find a gpio to toggle as well if it's really
> tight timings.

What might be more interesting is the interrupt status registers. Is
there a bit always set which the driver is not clearly correctly?

You can try printing the values. But that might upset the timing so
you cannot reproduce the issue.

If the printk() does upset the timing, what i have done before is
allocate an array of u32 values. Write the interrupt status into it,
looping around when you get to the end of the array. And then use
debugfs_create_u32_array() to export the array in /sys/kernel/debugfs.
Trigger the problem and then look at the values.

> > Is this your dual device board? Do you have both devices on the same
> > SPI bus? Do they share interrupt lines?
> > 
> 
> It's on the dual device board, the macphys are using separate spi buses,
> one chip shares the bus with another spi device, but the other is the
> only tenant on the bus.
> 
> No device shares an irq line.

I was just wondering how your setup differs so you can trigger the
issue, but others have not been able to reproduce it. It might be
another clue as to what is going on. I don't think you need to do
anything with respect to this, its just information to keep in mind.

	Andrew
Parthiban Veerasooran May 14, 2024, 4:46 a.m. UTC | #22
Hi Ramon,

On 13/05/24 7:20 pm, Ramón Nordin Rodriguez wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> On Mon, May 13, 2024 at 03:00:48PM +0200, Andrew Lunn wrote:
>>> I've enabled some debugging options but so far nothing seems to hit.
>>> What I've been able to conclude is that there still is SPI
>>> communication, the macphy interrupt is still pulled low, and the cpu
>>> does the ack so that it's reset to inactive.
>>
>> Is it doing this in an endless cycle?
> 
> Exactly, so what I'm seeing is when the driver livelocks the macphy is
> periodically pulling the irq pin low, the driver clears the interrupt
> and repeat.
If I understand correctly, you are keep on getting interrupt without 
indicating anything in the footer?. Are you using LAN8650 Rev.B0 or B1?. 
If it is B0 then can you try with Rev.B1 once?

Best regards,
Parthiban V
> 
>>
>> Probably the debug tools are not showing anything because it is not
>> looping in just one location. It is a complex loop, interrupts
>> triggering a thread which runs to completion etc. So it looks like
>> normal behaviour.
> 
> Gotcha. The 'do work' func called in the worker threads loop does run
> and return, so I guess there is not much to trigger on.
> 
>>
>> If it is an endless cycle, it sounds like an interrupt storm. Some
>> interrupt bit is not getting cleared, so it immediately fires again as
>> soon as interrupts are enabled.
> 
> Good input. I'll add some instrumentation/stats for how many jiffies
> have elapsed between releases of the worker thread and for the irq
> handler. I can probably find a gpio to toggle as well if it's really
> tight timings.
> 
> The irq pin is inactive/high for 100s of us to ms in the measurments
> I've done. But I've been using multiple channels and not the fanciest
> equipment so samplerates might be playing tricks, I'll rerun some tests
> while only measuring the irq pin.
> 
>>
>> Is this your dual device board? Do you have both devices on the same
>> SPI bus? Do they share interrupt lines?
>>
> 
> It's on the dual device board, the macphys are using separate spi buses,
> one chip shares the bus with another spi device, but the other is the
> only tenant on the bus.
> 
> No device shares an irq line.
> 
> Pretty sure I can replicate the result for both devices, but need to
> double check, been to much testing of random things for me to keep track.
> 
> I'll do some more digging, I think we're getting pretty close to
> understading the behaviour now.
> 
> R
Ramón Nordin Rodriguez May 15, 2024, 9:45 p.m. UTC | #23
> > Good input. I'll add some instrumentation/stats for how many jiffies
> > have elapsed between releases of the worker thread and for the irq
> > handler. I can probably find a gpio to toggle as well if it's really
> > tight timings.
> 
> What might be more interesting is the interrupt status registers. Is
> there a bit always set which the driver is not clearly correctly?
> 

Ah great point, I'll dump the irq status registers when things go awry.

> You can try printing the values. But that might upset the timing so
> you cannot reproduce the issue.
> 
> If the printk() does upset the timing, what i have done before is
> allocate an array of u32 values. Write the interrupt status into it,
> looping around when you get to the end of the array. And then use
> debugfs_create_u32_array() to export the array in /sys/kernel/debugfs.
> Trigger the problem and then look at the values.

Good tip, sounds like the exact strategy I need. Appreciate the hands on
suggestions!

> 
> > > Is this your dual device board? Do you have both devices on the same
> > > SPI bus? Do they share interrupt lines?
> > > 
> > 
> > It's on the dual device board, the macphys are using separate spi buses,
> > one chip shares the bus with another spi device, but the other is the
> > only tenant on the bus.
> > 
> > No device shares an irq line.
> 
> I was just wondering how your setup differs so you can trigger the
> issue, but others have not been able to reproduce it. It might be
> another clue as to what is going on. I don't think you need to do
> anything with respect to this, its just information to keep in mind.
> 

My typical setup is weird for sure, I skip the initramfs and load a
kernel and dtb into ram (which means I don't have DMA, due to missing fw),
but I'm in the habit of occasionally flashing and running a 'normal system'
every once in a while for verifcation.

I can't think of anything on top of my head that would set my system in
a unique position. But since I don't get the failure when I have a UDP
multicast running I'm guessing different networking daemons might affect
things, in this case we're running systemd-networkd and I manually set
an ipv4 address on the interfaces.

R
Ramón Nordin Rodriguez May 15, 2024, 9:48 p.m. UTC | #24
On Tue, May 14, 2024 at 04:46:58AM +0000, Parthiban.Veerasooran@microchip.com wrote:
> >> Is it doing this in an endless cycle?
> > 
> > Exactly, so what I'm seeing is when the driver livelocks the macphy is
> > periodically pulling the irq pin low, the driver clears the interrupt
> > and repeat.
> If I understand correctly, you are keep on getting interrupt without 
> indicating anything in the footer?. Are you using LAN8650 Rev.B0 or B1?. 
> If it is B0 then can you try with Rev.B1 once?
> 

I'll check the footer content, thanks for the tip!

All testing has bee done with Rev.B0, we've located a set of B1 chips.
So we'll get on resoldering and rerunning the test scenario.

R
Parthiban Veerasooran May 17, 2024, 9:38 a.m. UTC | #25
Hi Ramon,

On 16/05/24 3:18 am, Ramón Nordin Rodriguez wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> On Tue, May 14, 2024 at 04:46:58AM +0000, Parthiban.Veerasooran@microchip.com wrote:
>>>> Is it doing this in an endless cycle?
>>>
>>> Exactly, so what I'm seeing is when the driver livelocks the macphy is
>>> periodically pulling the irq pin low, the driver clears the interrupt
>>> and repeat.
>> If I understand correctly, you are keep on getting interrupt without
>> indicating anything in the footer?. Are you using LAN8650 Rev.B0 or B1?.
>> If it is B0 then can you try with Rev.B1 once?
>>
> 
> I'll check the footer content, thanks for the tip!
> 
> All testing has bee done with Rev.B0, we've located a set of B1 chips.
> So we'll get on resoldering and rerunning the test scenario.
Thanks for the consideration. But be informed that the internal PHY 
initial settings are updated for the Rev.B1. But the one from the 
mainline still supports for Rev.B0. So that microchip_t1s.c to be 
updated to support Rev.B1.

Also I am in talk with our design team that whether the updated initial 
settings for B1 are also applicable for B0. If so, then we will have 
only one updated initial setting which supports both B0 and B1.

Do you have any plan to update the microchip_t1s.c for Rev.B1 support OR 
do you want me to do it? If you want me to do it then I will prepare a 
separate patch series for the support?

Best regards,
Parthiban V
> 
> R
>
Ramón Nordin Rodriguez May 17, 2024, 12:43 p.m. UTC | #26
> Thanks for the consideration. But be informed that the internal PHY 
> initial settings are updated for the Rev.B1. But the one from the 
> mainline still supports for Rev.B0. So that microchip_t1s.c to be 
> updated to support Rev.B1.

So I figured, thanks for making it clear.

> 
> Also I am in talk with our design team that whether the updated initial 
> settings for B1 are also applicable for B0. If so, then we will have 
> only one updated initial setting which supports both B0 and B1.

Sounds ideal if one init procedure can cover both chip revs.

> 
> Do you have any plan to update the microchip_t1s.c for Rev.B1 support OR 
> do you want me to do it? If you want me to do it then I will prepare a 
> separate patch series for the support?

I'm keen on taking a Rev.B1 chip for a spin, so I'll jump on doing doing
the init/fixup. I can probably post it to the mailing list during the
weekend.
If you're planning on being faster than that I'll wait for your
submission.

Out of curiosity do you have any insight into if the rev b1 is expected
to behave differently?

R
Ramón Nordin Rodriguez May 24, 2024, 6:12 p.m. UTC | #27
> >>>> Is it doing this in an endless cycle?
> >>>
> >>> Exactly, so what I'm seeing is when the driver livelocks the macphy is
> >>> periodically pulling the irq pin low, the driver clears the interrupt
> >>> and repeat.
> >> If I understand correctly, you are keep on getting interrupt without
> >> indicating anything in the footer?. Are you using LAN8650 Rev.B0 or B1?.
> >> If it is B0 then can you try with Rev.B1 once?
> >>

After a considerable ammount of headscratching it seems that disabling collision
detection on the macphy is the only way of getting it stable.
When PLCA is enabled it's expected that CD causes problems, when running
in CSMA/CD mode it was unexpected (for me at least).

Disabling collision detection was discussed here 
https://lore.kernel.org/netdev/20231127104045.96722-1-ramon.nordin.rodriguez@ferroamp.se/
in a patchset that I haven't gotten around to testing through properly
and fixing up, but now it's definetly a priority.

Rev.b0 and b1 gives similar results in this domain, though I'm getting
lower throughput and it's easier/faster to get the internal error state
on rev.b1.

When CD is disabled both chip revs seems stable in all of my testing.

> > 
> > I'll check the footer content, thanks for the tip!
> > 
> > All testing has bee done with Rev.B0, we've located a set of B1 chips.
> > So we'll get on resoldering and rerunning the test scenario.
> Thanks for the consideration. But be informed that the internal PHY 
> initial settings are updated for the Rev.B1. But the one from the 
> mainline still supports for Rev.B0. So that microchip_t1s.c to be 
> updated to support Rev.B1.

I posted a suggestion for how to bringup rev.b1
https://lore.kernel.org/netdev/20240524140706.359537-1-ramon.nordin.rodriguez@ferroamp.se/

I should have prefaced the cover letter with 'ugly hacks ahead'.

> 
> Also I am in talk with our design team that whether the updated initial 
> settings for B1 are also applicable for B0. If so, then we will have 
> only one updated initial setting which supports both B0 and B1.

Any update on this?

I will submit a new revision of the lan8670 revc + disable collision
detection pathset where CD is disabled regardless of operating mode.

R
Andrew Lunn May 24, 2024, 6:31 p.m. UTC | #28
> After a considerable ammount of headscratching it seems that disabling collision
> detection on the macphy is the only way of getting it stable.
> When PLCA is enabled it's expected that CD causes problems, when running
> in CSMA/CD mode it was unexpected (for me at least).

Now we are back to, why is your system different? What is triggering a
collision for you, but not Parthiban?

There is nothing in the standard about reporting a collision. So this
is a Microchip extension? So the framework is not doing anything when
it happens, which will explain why it becomes a storm.... Until we do
have a mechanism to handle vendor specific interrupts, the frame work
should disable them all, to avoid this storm.

Does the datasheet document what to do on a collision? How are you
supposed to clear the condition?

       Andrew
Piergiorgio Beruto May 24, 2024, 6:49 p.m. UTC | #29
Hi all,
Just my two cents here...

Collision detection is a fundamental building block of the CSMA/CD mechanism.
The PHY detects physical collisions and reports them to the MAC via the COL pin.
The MAC is supposed to perform the normal back-off operation: stop the current transmission and re-transmit at a later time following the exponentially increasing random algorithm (See IEEE 802.3 Clause 4).
** Therefore, collision detect shall NOT be brought up to the user. In a MACPHY system it is supposed to be handled entirely by the PHY. **

With the introduction of PLCA, the PHY may report also "logical collisions" to the MAC.  These are not real collisions as they don't happen on the line. They are part of the normal PLCA back-pressuring mechanism that allows the PHY to send a frame only during a specific transmit opportunity. So once more, this kind of collision shall not be reported to the user, it is just normal behavior.

However, physical collisions (i.e., collisions that really happens on the line) are NOT supposed to happen when PLCA is enabled and configured correctly.
For this reason we have a standard IEEE register (PCS Diagnostic 2) which still captures physical collisions (not logical ones). This register is supposed to be used as a diagnostic information to let the user know that something is misconfigured.
Now, some PHYs can be configured to provide IRQs when this kind of collision happens, indicating a configuration problem.

Additionally, many PHYs allows the user to completely switch off the PHY physical collision detection when PLCA is enabled. This is also recommended by the OPEN Alliance specifications, although no standard register was defined to achieve that (maybe we should add it to the standard...).

I'm not sure I followed all this discussion, I just hope this clarification might help in finding a good solution.
In short:

- Collisions are NOT to be reported to Linux. The MAC/PHY shall handle those internally.
- Physical collisions are still counted into a diagnostic register (could be good to add it to ethtool MAC statistics)
- Disabling collision detection is allowed only when PLCA is enabled, but it is not a standard feature, although automotive specs recommends it.

Thanks,
Piergiorgio

-----Original Message-----
From: Andrew Lunn <andrew@lunn.ch> 
Sent: 24 May, 2024 20:32
To: Ramón Nordin Rodriguez <ramon.nordin.rodriguez@ferroamp.se>
Cc: Parthiban.Veerasooran@microchip.com; Piergiorgio Beruto <Pier.Beruto@onsemi.com>; davem@davemloft.net; edumazet@google.com; kuba@kernel.org; pabeni@redhat.com; horms@kernel.org; saeedm@nvidia.com; anthony.l.nguyen@intel.com; netdev@vger.kernel.org; linux-kernel@vger.kernel.org; corbet@lwn.net; linux-doc@vger.kernel.org; robh+dt@kernel.org; krzysztof.kozlowski+dt@linaro.org; conor+dt@kernel.org; devicetree@vger.kernel.org; Horatiu.Vultur@microchip.com; ruanjinjie@huawei.com; Steen.Hegelund@microchip.com; vladimir.oltean@nxp.com; UNGLinuxDriver@microchip.com; Thorsten.Kummermehr@microchip.com; Selvamani Rajagopal <Selvamani.Rajagopal@onsemi.com>; Nicolas.Ferre@microchip.com; benjamin.bigler@bernformulastudent.ch
Subject: Re: [PATCH net-next v4 05/12] net: ethernet: oa_tc6: implement error interrupts unmasking

[External Email]: This email arrived from an external source - Please exercise caution when opening any attachments or clicking on links.

> After a considerable ammount of headscratching it seems that disabling 
> collision detection on the macphy is the only way of getting it stable.
> When PLCA is enabled it's expected that CD causes problems, when 
> running in CSMA/CD mode it was unexpected (for me at least).

Now we are back to, why is your system different? What is triggering a collision for you, but not Parthiban?

There is nothing in the standard about reporting a collision. So this is a Microchip extension? So the framework is not doing anything when it happens, which will explain why it becomes a storm.... Until we do have a mechanism to handle vendor specific interrupts, the frame work should disable them all, to avoid this storm.

Does the datasheet document what to do on a collision? How are you supposed to clear the condition?

       Andrew
Parthiban Veerasooran May 27, 2024, 8:38 a.m. UTC | #30
Hi Ramon,

On 24/05/24 11:42 pm, Ramón Nordin Rodriguez wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
>>>>>> Is it doing this in an endless cycle?
>>>>>
>>>>> Exactly, so what I'm seeing is when the driver livelocks the macphy is
>>>>> periodically pulling the irq pin low, the driver clears the interrupt
>>>>> and repeat.
>>>> If I understand correctly, you are keep on getting interrupt without
>>>> indicating anything in the footer?. Are you using LAN8650 Rev.B0 or B1?.
>>>> If it is B0 then can you try with Rev.B1 once?
>>>>
> 
> After a considerable ammount of headscratching it seems that disabling collision
> detection on the macphy is the only way of getting it stable.
> When PLCA is enabled it's expected that CD causes problems, when running
> in CSMA/CD mode it was unexpected (for me at least).
Thanks for the feedback.
> 
> Disabling collision detection was discussed here
> https://lore.kernel.org/netdev/20231127104045.96722-1-ramon.nordin.rodriguez@ferroamp.se/
As you started this thread long back, I thought that those patches are 
already in but now I understand that they are not. In all my testings I 
have my CD disable fix in my PHY driver.
> in a patchset that I haven't gotten around to testing through properly
> and fixing up, but now it's definetly a priority.
> 
> Rev.b0 and b1 gives similar results in this domain, though I'm getting
> lower throughput and it's easier/faster to get the internal error state
> on rev.b1.
> 
> When CD is disabled both chip revs seems stable in all of my testing.
If I understand correctly, disabling CD when PLCA enabled works as 
expected in both B0 and B1? correct me if I am wrong. If that is the 
case, then I would recommend to concentrate on the below patch set to 
get in the mainline first instead of focusing on B1 patches which can be 
done after that.
https://lore.kernel.org/netdev/20231127104045.96722-1-ramon.nordin.rodriguez@ferroamp.se/
> 
>>>
>>> I'll check the footer content, thanks for the tip!
>>>
>>> All testing has bee done with Rev.B0, we've located a set of B1 chips.
>>> So we'll get on resoldering and rerunning the test scenario.
>> Thanks for the consideration. But be informed that the internal PHY
>> initial settings are updated for the Rev.B1. But the one from the
>> mainline still supports for Rev.B0. So that microchip_t1s.c to be
>> updated to support Rev.B1.
> 
> I posted a suggestion for how to bringup rev.b1
> https://lore.kernel.org/netdev/20240524140706.359537-1-ramon.nordin.rodriguez@ferroamp.se/
> 
> I should have prefaced the cover letter with 'ugly hacks ahead'.
> 
>>
>> Also I am in talk with our design team that whether the updated initial
>> settings for B1 are also applicable for B0. If so, then we will have
>> only one updated initial setting which supports both B0 and B1.
> 
> Any update on this?
I think, I have answered for this in another mail.
> 
> I will submit a new revision of the lan8670 revc + disable collision
> detection pathset where CD is disabled regardless of operating mode.
Yes, I would recommend to do it.

Best regards,
Parthiban V
> 
> R
>
Parthiban Veerasooran May 27, 2024, 9:30 a.m. UTC | #31
Hi Andrew,

On 25/05/24 12:01 am, Andrew Lunn wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
>> After a considerable ammount of headscratching it seems that disabling collision
>> detection on the macphy is the only way of getting it stable.
>> When PLCA is enabled it's expected that CD causes problems, when running
>> in CSMA/CD mode it was unexpected (for me at least).
> 
> Now we are back to, why is your system different? What is triggering a
> collision for you, but not Parthiban?
I am using PHY driver which has "Disable CD if PLCA is enabled" fix. 
Probably that could be the reason that why I am not running into these 
issues.
> 
> There is nothing in the standard about reporting a collision. So this
> is a Microchip extension? So the framework is not doing anything when
> it happens, which will explain why it becomes a storm.... Until we do
> have a mechanism to handle vendor specific interrupts, the frame work
> should disable them all, to avoid this storm.
"IEEE 10BASE-T1S Implementation Specification" from OPEN Alliance does 
specify this in the section "5.2 Collision Detection (CD) / Handling" 
for the Automotive environment support.

https://opensig.org/wp-content/uploads/2023/12/20230215_10BASE-T1S_system_implementation_V1_0.pdf

The automotive EMC immunity requirements exceeds the alien crosstalk 
noise levels defined in IEEE 802.3cgTM-2019 [1]. Therefore, in such 
environment the CD mechanism of the PHY may not be able to distinguish 
noise from collisions, limiting the achievable level of immunity.
> 
> Does the datasheet document what to do on a collision? How are you
> supposed to clear the condition?
"8.5 PLCA Collision Detection" section in the LAN8650/1 datasheet 
describes the importance of disabling collision detection in case of 
PLCA mode enabled.

When nodes in a mixing segment are properly configured for PLCA 
operation there will be no physical collisions. However, under certain 
conditions, including mixing segments with significant inherent noise 
due to reflections, and systems under high electromagnetic stress, false 
collisions may be detected. The false detection of late collisions will 
result in the transmitting node dropping the packet. As packets are 
typically received correctly in these conditions, it is recommended to 
disable collision detection at any time that PLCA is enabled and active. 
Collision detection is disabled by writing a zero to the Collision 
Detect Enable (CDEN) bit in the Collision Detector Control 0 (CDCTL0) 
register.

https://ww1.microchip.com/downloads/aemDocuments/documents/AIS/ProductDocuments/DataSheets/LAN8650-1-Data-Sheet-60001734.pdf

Hope this clarifies.

Best regards,
Parthiban V
> 
>         Andrew
diff mbox series

Patch

diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c
index 4b0f63c02c35..850765574d30 100644
--- a/drivers/net/ethernet/oa_tc6.c
+++ b/drivers/net/ethernet/oa_tc6.c
@@ -18,6 +18,13 @@ 
 #define OA_TC6_REG_STATUS0			0x0008
 #define STATUS0_RESETC				BIT(6)	/* Reset Complete */
 
+/* Interrupt Mask Register #0 */
+#define OA_TC6_REG_INT_MASK0			0x000C
+#define INT_MASK0_HEADER_ERR_MASK		BIT(5)
+#define INT_MASK0_LOSS_OF_FRAME_ERR_MASK	BIT(4)
+#define INT_MASK0_RX_BUFFER_OVERFLOW_ERR_MASK	BIT(3)
+#define INT_MASK0_TX_PROTOCOL_ERR_MASK		BIT(0)
+
 /* Control command header */
 #define OA_TC6_CTRL_HEADER_DATA_NOT_CTRL	BIT(31)
 #define OA_TC6_CTRL_HEADER_WRITE_NOT_READ	BIT(29)
@@ -324,6 +331,23 @@  static int oa_tc6_sw_reset_macphy(struct oa_tc6 *tc6)
 	return oa_tc6_write_register(tc6, OA_TC6_REG_STATUS0, regval);
 }
 
+static int oa_tc6_unmask_macphy_error_interrupts(struct oa_tc6 *tc6)
+{
+	u32 regval;
+	int ret;
+
+	ret = oa_tc6_read_register(tc6, OA_TC6_REG_INT_MASK0, &regval);
+	if (ret)
+		return ret;
+
+	regval &= ~(INT_MASK0_TX_PROTOCOL_ERR_MASK |
+		    INT_MASK0_RX_BUFFER_OVERFLOW_ERR_MASK |
+		    INT_MASK0_LOSS_OF_FRAME_ERR_MASK |
+		    INT_MASK0_HEADER_ERR_MASK);
+
+	return oa_tc6_write_register(tc6, OA_TC6_REG_INT_MASK0, regval);
+}
+
 /**
  * oa_tc6_init - allocates and initializes oa_tc6 structure.
  * @spi: device with which data will be exchanged.
@@ -364,6 +388,13 @@  struct oa_tc6 *oa_tc6_init(struct spi_device *spi)
 		return NULL;
 	}
 
+	ret = oa_tc6_unmask_macphy_error_interrupts(tc6);
+	if (ret) {
+		dev_err(&tc6->spi->dev,
+			"MAC-PHY error interrupts unmask failed: %d\n", ret);
+		return NULL;
+	}
+
 	return tc6;
 }
 EXPORT_SYMBOL_GPL(oa_tc6_init);