diff mbox series

virtio_net: introduce TX timeout watchdog

Message ID 20210917084004.44332-1-tonylu@linux.alibaba.com (mailing list archive)
State Accepted
Delegated to: Netdev Maintainers
Headers show
Series virtio_net: introduce TX timeout watchdog | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Guessed tree name to be net-next
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cc_maintainers warning 2 maintainers not CCed: kuba@kernel.org virtualization@lists.linux-foundation.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 64 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link

Commit Message

Tony Lu Sept. 17, 2021, 8:40 a.m. UTC
From: Tony Lu <tony.ly@linux.alibaba.com>

This implements ndo_tx_timeout handler and put this into stats. When
there is something wrong to send out packets, we could notice tx timeout
events and total timeout counter.

We have suffered send timeout issues due to the backends hung. With this,
we can find the details, and collect the counters by monitor systems.

Signed-off-by: Tony Lu <tony.ly@linux.alibaba.com>
---
 drivers/net/virtio_net.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

Comments

Jason Wang Sept. 26, 2021, 4:36 a.m. UTC | #1
On Fri, Sep 17, 2021 at 4:45 PM tonylu_linux <tonylu@linux.alibaba.com> wrote:
>
> From: Tony Lu <tony.ly@linux.alibaba.com>
>
> This implements ndo_tx_timeout handler and put this into stats. When
> there is something wrong to send out packets, we could notice tx timeout
> events and total timeout counter.
>
> We have suffered send timeout issues due to the backends hung. With this,
> we can find the details, and collect the counters by monitor systems.
>
> Signed-off-by: Tony Lu <tony.ly@linux.alibaba.com>

Note that we support non tx interrupt mode (which could be turned on
via ethtool).

I wonder if this can work well in that case.

Or maybe it's the time to remove the non tx interrupt mode completely.
Want to do that?

Thanks

> ---
>  drivers/net/virtio_net.c | 22 +++++++++++++++++++++-
>  1 file changed, 21 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 271d38c1d9f8..90fed0fdc40f 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -80,6 +80,7 @@ struct virtnet_sq_stats {
>         u64 xdp_tx;
>         u64 xdp_tx_drops;
>         u64 kicks;
> +       u64 tx_timeouts;
>  };
>
>  struct virtnet_rq_stats {
> @@ -103,6 +104,7 @@ static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
>         { "xdp_tx",             VIRTNET_SQ_STAT(xdp_tx) },
>         { "xdp_tx_drops",       VIRTNET_SQ_STAT(xdp_tx_drops) },
>         { "kicks",              VIRTNET_SQ_STAT(kicks) },
> +       { "tx_timeouts",        VIRTNET_SQ_STAT(tx_timeouts) },
>  };
>
>  static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
> @@ -1856,7 +1858,7 @@ static void virtnet_stats(struct net_device *dev,
>         int i;
>
>         for (i = 0; i < vi->max_queue_pairs; i++) {
> -               u64 tpackets, tbytes, rpackets, rbytes, rdrops;
> +               u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops;
>                 struct receive_queue *rq = &vi->rq[i];
>                 struct send_queue *sq = &vi->sq[i];
>
> @@ -1864,6 +1866,7 @@ static void virtnet_stats(struct net_device *dev,
>                         start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
>                         tpackets = sq->stats.packets;
>                         tbytes   = sq->stats.bytes;
> +                       terrors  = sq->stats.tx_timeouts;
>                 } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
>
>                 do {
> @@ -1878,6 +1881,7 @@ static void virtnet_stats(struct net_device *dev,
>                 tot->rx_bytes   += rbytes;
>                 tot->tx_bytes   += tbytes;
>                 tot->rx_dropped += rdrops;
> +               tot->tx_errors  += terrors;
>         }
>
>         tot->tx_dropped = dev->stats.tx_dropped;
> @@ -2659,6 +2663,21 @@ static int virtnet_set_features(struct net_device *dev,
>         return 0;
>  }
>
> +static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue)
> +{
> +       struct virtnet_info *priv = netdev_priv(dev);
> +       struct send_queue *sq = &priv->sq[txqueue];
> +       struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue);
> +
> +       u64_stats_update_begin(&sq->stats.syncp);
> +       sq->stats.tx_timeouts++;
> +       u64_stats_update_end(&sq->stats.syncp);
> +
> +       netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
> +                  txqueue, sq->name, sq->vq->index, sq->vq->name,
> +                  jiffies_to_usecs(jiffies - txq->trans_start));
> +}
> +
>  static const struct net_device_ops virtnet_netdev = {
>         .ndo_open            = virtnet_open,
>         .ndo_stop            = virtnet_close,
> @@ -2674,6 +2693,7 @@ static const struct net_device_ops virtnet_netdev = {
>         .ndo_features_check     = passthru_features_check,
>         .ndo_get_phys_port_name = virtnet_get_phys_port_name,
>         .ndo_set_features       = virtnet_set_features,
> +       .ndo_tx_timeout         = virtnet_tx_timeout,
>  };
>
>  static void virtnet_config_changed_work(struct work_struct *work)
> --
> 2.19.1.6.gb485710b
>
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 271d38c1d9f8..90fed0fdc40f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -80,6 +80,7 @@  struct virtnet_sq_stats {
 	u64 xdp_tx;
 	u64 xdp_tx_drops;
 	u64 kicks;
+	u64 tx_timeouts;
 };
 
 struct virtnet_rq_stats {
@@ -103,6 +104,7 @@  static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
 	{ "xdp_tx",		VIRTNET_SQ_STAT(xdp_tx) },
 	{ "xdp_tx_drops",	VIRTNET_SQ_STAT(xdp_tx_drops) },
 	{ "kicks",		VIRTNET_SQ_STAT(kicks) },
+	{ "tx_timeouts",	VIRTNET_SQ_STAT(tx_timeouts) },
 };
 
 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
@@ -1856,7 +1858,7 @@  static void virtnet_stats(struct net_device *dev,
 	int i;
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
-		u64 tpackets, tbytes, rpackets, rbytes, rdrops;
+		u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops;
 		struct receive_queue *rq = &vi->rq[i];
 		struct send_queue *sq = &vi->sq[i];
 
@@ -1864,6 +1866,7 @@  static void virtnet_stats(struct net_device *dev,
 			start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
 			tpackets = sq->stats.packets;
 			tbytes   = sq->stats.bytes;
+			terrors  = sq->stats.tx_timeouts;
 		} while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
 
 		do {
@@ -1878,6 +1881,7 @@  static void virtnet_stats(struct net_device *dev,
 		tot->rx_bytes   += rbytes;
 		tot->tx_bytes   += tbytes;
 		tot->rx_dropped += rdrops;
+		tot->tx_errors  += terrors;
 	}
 
 	tot->tx_dropped = dev->stats.tx_dropped;
@@ -2659,6 +2663,21 @@  static int virtnet_set_features(struct net_device *dev,
 	return 0;
 }
 
+static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+	struct virtnet_info *priv = netdev_priv(dev);
+	struct send_queue *sq = &priv->sq[txqueue];
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue);
+
+	u64_stats_update_begin(&sq->stats.syncp);
+	sq->stats.tx_timeouts++;
+	u64_stats_update_end(&sq->stats.syncp);
+
+	netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
+		   txqueue, sq->name, sq->vq->index, sq->vq->name,
+		   jiffies_to_usecs(jiffies - txq->trans_start));
+}
+
 static const struct net_device_ops virtnet_netdev = {
 	.ndo_open            = virtnet_open,
 	.ndo_stop   	     = virtnet_close,
@@ -2674,6 +2693,7 @@  static const struct net_device_ops virtnet_netdev = {
 	.ndo_features_check	= passthru_features_check,
 	.ndo_get_phys_port_name	= virtnet_get_phys_port_name,
 	.ndo_set_features	= virtnet_set_features,
+	.ndo_tx_timeout		= virtnet_tx_timeout,
 };
 
 static void virtnet_config_changed_work(struct work_struct *work)