diff mbox series

[for-next,02/10] IB/{ipoib,hfi1}: Add a timeout handler for rdma_netdev

Message ID 1617026056-50483-3-git-send-email-dennis.dalessandro@cornelisnetworks.com (mailing list archive)
State Accepted
Delegated to: Jason Gunthorpe
Headers show
Series Fixes for 5.13 | expand

Commit Message

Dennis Dalessandro March 29, 2021, 1:54 p.m. UTC
From: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>

The current rdma_netdev handling in ipoib hooks the
tx_timeout handler, but prints out a totally useless
message that prevents effective debugging especially when
multiple transmit queues are being used.

Add a tx_timeout rdma_netdev hook and implement the
callback in the hfi1 to print additional information.

The existing non-helpful message is avoided when the driver
has presented a callback.

Reviewed-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
---
 drivers/infiniband/hw/hfi1/ipoib.h        |  2 ++
 drivers/infiniband/hw/hfi1/ipoib_main.c   |  1 +
 drivers/infiniband/hw/hfi1/ipoib_tx.c     | 29 +++++++++++++++++++++++++++++
 drivers/infiniband/ulp/ipoib/ipoib_main.c |  5 +++++
 include/rdma/ib_verbs.h                   |  2 ++
 5 files changed, 39 insertions(+)
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h
index f650cac..1659beb 100644
--- a/drivers/infiniband/hw/hfi1/ipoib.h
+++ b/drivers/infiniband/hw/hfi1/ipoib.h
@@ -147,4 +147,6 @@  int hfi1_ipoib_rn_get_params(struct ib_device *device,
 			     enum rdma_netdev_t type,
 			     struct rdma_netdev_alloc_params *params);
 
+void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q);
+
 #endif /* _IPOIB_H */
diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c
index 3242290..b8838fa 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_main.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_main.c
@@ -204,6 +204,7 @@  static int hfi1_ipoib_setup_rn(struct ib_device *device,
 	int rc;
 
 	rn->send = hfi1_ipoib_send;
+	rn->tx_timeout = hfi1_ipoib_tx_timeout;
 	rn->attach_mcast = hfi1_ipoib_mcast_attach;
 	rn->detach_mcast = hfi1_ipoib_mcast_detach;
 	rn->set_id = hfi1_ipoib_set_id;
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
index 5129dc9..e8dece5 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -866,3 +866,32 @@  void hfi1_ipoib_napi_tx_disable(struct net_device *dev)
 		(void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items);
 	}
 }
+
+void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	struct hfi1_ipoib_txq *txq = &priv->txqs[q];
+	u64 completed = atomic64_read(&txq->complete_txreqs);
+
+	dd_dev_info(priv->dd, "timeout txq %llx q %u stopped %u stops %d no_desc %d ring_full %d\n",
+		    (unsigned long long)txq, q,
+		    __netif_subqueue_stopped(dev, txq->q_idx),
+		    atomic_read(&txq->stops),
+		    atomic_read(&txq->no_desc),
+		    atomic_read(&txq->ring_full));
+	dd_dev_info(priv->dd, "sde %llx engine %u\n",
+		    (unsigned long long)txq->sde,
+		    txq->sde ? txq->sde->this_idx : 0);
+	dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int);
+	dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n",
+		    txq->sent_txreqs, completed, hfi1_ipoib_used(txq));
+	dd_dev_info(priv->dd, "tx_queue_len %u max_items %lu\n",
+		    dev->tx_queue_len, txq->tx_ring.max_items);
+	dd_dev_info(priv->dd, "head %lu tail %lu\n",
+		    txq->tx_ring.head, txq->tx_ring.tail);
+	dd_dev_info(priv->dd, "wait queued %u\n",
+		    !list_empty(&txq->wait.list));
+	dd_dev_info(priv->dd, "tx_list empty %u\n",
+		    list_empty(&txq->tx_list));
+}
+
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e16b40c..2fb2fa16 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1181,7 +1181,12 @@  static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 static void ipoib_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	struct rdma_netdev *rn = netdev_priv(dev);
 
+	if (rn->tx_timeout) {
+		rn->tx_timeout(dev, txqueue);
+		return;
+	}
 	ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
 		   jiffies_to_msecs(jiffies - dev_trans_start(dev)));
 	ipoib_warn(priv,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 21c19b1..84f7084 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2215,6 +2215,8 @@  struct rdma_netdev {
 			    int set_qkey, u32 qkey);
 	int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
 			    union ib_gid *gid, u16 mlid);
+	/* timeout */
+	void (*tx_timeout)(struct net_device *dev, unsigned int txqueue);
 };
 
 struct rdma_netdev_alloc_params {