diff mbox series

[net-next,12/33] can: sja1000: Prevent overrun stalls with a soft reset on Renesas SoCs

Message ID 20230622082658.571150-13-mkl@pengutronix.de (mailing list archive)
State Accepted
Commit 717c6ec241b5524400acc0f6009b89e2c59527f9
Delegated to: Netdev Maintainers
Headers show
Series [net-next,01/33] can: kvaser_usb: Add len8_dlc support | expand

Checks

Context Check Description
netdev/series_format warning Pull request is its own cover letter; Series longer than 15 patches
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers warning 6 maintainers not CCed: cui.jinpeng2@zte.com.cn biju.das.jz@bp.renesas.com wg@grandegger.com pabeni@redhat.com edumazet@google.com mailhol.vincent@wanadoo.fr
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 84 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Marc Kleine-Budde June 22, 2023, 8:26 a.m. UTC
From: Miquel Raynal <miquel.raynal@bootlin.com>

In their RZN1 SoC, Renesas put a CAN controller supposed to act very
similarly to the original Philips sja1000. In practice, while flooding
the bus with another device, we discovered that the controller very
often after an overrun situation would just refuse any new frame, drop
them all and trigger over and over again the overrun interrupt, even
though the buffer would have been totally emptied. The controller acts
like if its internal buffer offsets (where it writes and where the host
reads) where totally screwed-up.

Renesas manual mentions a single action to perform in order to
resynchronize the read and write offsets within the buffer: performing
a soft reset.

Performing a soft reset takes a bit of time and involves small delays,
so better do that in a threaded handler rather than inside the hard IRQ
handler.

Add platform data to recognize the platforms which need this workaround,
and when the faulty situation is diagnosed, stop what is being
performed and request the threaded handler to be executed in order to
perform the reset.

Tested-by: Jérémie Dautheribes  <jeremie.dautheribes@bootlin.com> # 5.10
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/all/20230616134553.2786391-2-miquel.raynal@bootlin.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/sja1000/sja1000.c          | 29 +++++++++++++++++++---
 drivers/net/can/sja1000/sja1000.h          |  1 +
 drivers/net/can/sja1000/sja1000_platform.c |  5 +++-
 3 files changed, 31 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 4719806e3a9f..0ada0e160e93 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -387,6 +387,16 @@  static void sja1000_rx(struct net_device *dev)
 	netif_rx(skb);
 }
 
+static irqreturn_t sja1000_reset_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = (struct net_device *)dev_id;
+
+	netdev_dbg(dev, "performing a soft reset upon overrun\n");
+	sja1000_start(dev);
+
+	return IRQ_HANDLED;
+}
+
 static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
 {
 	struct sja1000_priv *priv = netdev_priv(dev);
@@ -397,6 +407,7 @@  static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
 	enum can_state rx_state, tx_state;
 	unsigned int rxerr, txerr;
 	uint8_t ecc, alc;
+	int ret = 0;
 
 	skb = alloc_can_err_skb(dev, &cf);
 	if (skb == NULL)
@@ -413,6 +424,15 @@  static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
 		stats->rx_over_errors++;
 		stats->rx_errors++;
 		sja1000_write_cmdreg(priv, CMD_CDO);	/* clear bit */
+
+		/* Some controllers needs additional handling upon overrun
+		 * condition: the controller may sometimes be totally confused
+		 * and refuse any new frame while its buffer is empty. The only
+		 * way to re-sync the read vs. write buffer offsets is to
+		 * stop any current handling and perform a reset.
+		 */
+		if (priv->flags & SJA1000_QUIRK_RESET_ON_OVERRUN)
+			ret = IRQ_WAKE_THREAD;
 	}
 
 	if (isrc & IRQ_EI) {
@@ -492,7 +512,7 @@  static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
 
 	netif_rx(skb);
 
-	return 0;
+	return ret;
 }
 
 irqreturn_t sja1000_interrupt(int irq, void *dev_id)
@@ -548,6 +568,8 @@  irqreturn_t sja1000_interrupt(int irq, void *dev_id)
 		if (isrc & (IRQ_DOI | IRQ_EI | IRQ_BEI | IRQ_EPI | IRQ_ALI)) {
 			/* error interrupt */
 			err = sja1000_err(dev, isrc, status);
+			if (err == IRQ_WAKE_THREAD)
+				ret = err;
 			if (err)
 				break;
 		}
@@ -582,8 +604,9 @@  static int sja1000_open(struct net_device *dev)
 
 	/* register interrupt handler, if not done by the device driver */
 	if (!(priv->flags & SJA1000_CUSTOM_IRQ_HANDLER)) {
-		err = request_irq(dev->irq, sja1000_interrupt, priv->irq_flags,
-				  dev->name, (void *)dev);
+		err = request_threaded_irq(dev->irq, sja1000_interrupt,
+					   sja1000_reset_interrupt,
+					   priv->irq_flags, dev->name, (void *)dev);
 		if (err) {
 			close_candev(dev);
 			return -EAGAIN;
diff --git a/drivers/net/can/sja1000/sja1000.h b/drivers/net/can/sja1000/sja1000.h
index 7f736f1df547..f015e39e2224 100644
--- a/drivers/net/can/sja1000/sja1000.h
+++ b/drivers/net/can/sja1000/sja1000.h
@@ -147,6 +147,7 @@ 
  */
 #define SJA1000_CUSTOM_IRQ_HANDLER	BIT(0)
 #define SJA1000_QUIRK_NO_CDR_REG	BIT(1)
+#define SJA1000_QUIRK_RESET_ON_OVERRUN	BIT(2)
 
 /*
  * SJA1000 private data structure
diff --git a/drivers/net/can/sja1000/sja1000_platform.c b/drivers/net/can/sja1000/sja1000_platform.c
index b4889b5746e5..4e59952c66d4 100644
--- a/drivers/net/can/sja1000/sja1000_platform.c
+++ b/drivers/net/can/sja1000/sja1000_platform.c
@@ -106,7 +106,7 @@  static void sp_technologic_init(struct sja1000_priv *priv, struct device_node *o
 
 static void sp_rzn1_init(struct sja1000_priv *priv, struct device_node *of)
 {
-	priv->flags = SJA1000_QUIRK_NO_CDR_REG;
+	priv->flags = SJA1000_QUIRK_NO_CDR_REG | SJA1000_QUIRK_RESET_ON_OVERRUN;
 }
 
 static void sp_populate(struct sja1000_priv *priv,
@@ -277,6 +277,9 @@  static int sp_probe(struct platform_device *pdev)
 		priv->irq_flags = IRQF_SHARED;
 	}
 
+	if (priv->flags & SJA1000_QUIRK_RESET_ON_OVERRUN)
+		priv->irq_flags |= IRQF_ONESHOT;
+
 	dev->irq = irq;
 	priv->reg_base = addr;