diff mbox

Add exponential backoff + random delay to MADs when retrying after timeout.

Message ID 4C2744E8AD2982428C5BFE523DF8CDCB49D45E3CB6@MNEXMB1.qlogic.org (mailing list archive)
State New, archived
Headers show

Commit Message

Michael Heinz Oct. 11, 2010, 3:34 p.m. UTC
None
diff mbox

Patch

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ef1304f..3b03f1c 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -42,6 +42,11 @@ 
 #include "smi.h"
 #include "agent.h"
 
+#include "linux/random.h"
+
+#define MAD_MIN_TIMEOUT_MS 511
+#define MAD_RAND_TIMEOUT_MS 511
+
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("kernel IB MAD API");
 MODULE_AUTHOR("Hal Rosenstock");
@@ -55,6 +60,10 @@  MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
 module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
 
+int mad_randomized_wait = 0;
+module_param_named(randomized_wait, mad_randomized_wait, int, 0444);
+MODULE_PARM_DESC(randomized_wait, "When true, use a randomized backoff algorithm to control retries for timeouts.");
+
 static struct kmem_cache *ib_mad_cache;
 
 static struct list_head ib_mad_port_list;
@@ -1102,11 +1111,18 @@  int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
 		}
 
 		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
+
+		mad_send_wr->randomized_wait = mad_randomized_wait || send_buf->randomized_wait;
+		mad_send_wr->total_timeout = msecs_to_jiffies(send_buf->timeout_ms) * send_buf->retries;
+		
 		/* Timeout will be updated after send completes */
 		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
+
 		mad_send_wr->max_retries = send_buf->retries;
 		mad_send_wr->retries_left = send_buf->retries;
+		
 		send_buf->retries = 0;
+		
 		/* Reference for work request to QP + response */
 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
 		mad_send_wr->status = IB_WC_SUCCESS;
@@ -1803,6 +1819,7 @@  static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 
 	/* Complete corresponding request */
 	if (ib_response_mad(mad_recv_wc->recv_buf.mad)) {
+
 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
 		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
 		if (!mad_send_wr) {
@@ -1811,6 +1828,7 @@  static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 			deref_mad_agent(mad_agent_priv);
 			return;
 		}
+
 		ib_mark_mad_done(mad_send_wr);
 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
@@ -2429,14 +2447,33 @@  static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	int ret;
 
-	if (!mad_send_wr->retries_left)
+	if (!mad_send_wr->retries_left || (mad_send_wr->total_timeout == 0))
 		return -ETIMEDOUT;
 
 	mad_send_wr->retries_left--;
 	mad_send_wr->send_buf.retries++;
 
-	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+	if (mad_send_wr->randomized_wait) {
+		mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms +
+			(MAD_MIN_TIMEOUT_MS<<mad_send_wr->send_buf.retries) -
+			(random32()&MAD_RAND_TIMEOUT_MS));
+		if (mad_send_wr->timeout > mad_send_wr->total_timeout) {
+			mad_send_wr->timeout = mad_send_wr->total_timeout;
+			mad_send_wr->total_timeout = 0;
+		} else {
+			mad_send_wr->total_timeout -= mad_send_wr->timeout;
+		}
+	} else {
+		mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+	}
 
+	printk(KERN_DEBUG PFX "Retrying send %p: retries: %u, retries_left: %u, timeout: %lu, total_timeout: %lu\n",
+		mad_send_wr,
+		mad_send_wr->send_buf.retries,
+		mad_send_wr->retries_left,
+		mad_send_wr->timeout,
+		mad_send_wr->total_timeout);
+		
 	if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
 		ret = ib_retry_rmpp(mad_send_wr);
 		switch (ret) {
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 9430ab4..01fb7ed 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -132,8 +132,10 @@  struct ib_mad_send_wr_private {
 	struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
 	__be64 tid;
 	unsigned long timeout;
+	unsigned long total_timeout;
 	int max_retries;
 	int retries_left;
+	int randomized_wait;
 	int retry;
 	int refcount;
 	enum ib_wc_status status;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index d3b9401..c3d6efb 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -77,6 +77,15 @@ 
 
 #define IB_MGMT_MAX_METHODS			128
 
+/* MAD Status field bit masks */
+#define IB_MGMT_MAD_STATUS_SUCCESS						0x0000
+#define IB_MGMT_MAD_STATUS_BUSY							0x0001
+#define IB_MGMT_MAD_STATUS_REDIRECT_REQD				0x0002
+#define IB_MGMT_MAD_STATUS_BAD_VERERSION				0x0004	
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD			0x0008	
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB	0x000c
+#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE			0x001c
+
 /* RMPP information */
 #define IB_MGMT_RMPP_VERSION			1
 
@@ -246,6 +255,7 @@  struct ib_mad_send_buf {
 	int			seg_count;
 	int			seg_size;
 	int			timeout_ms;
+	int			randomized_wait;
 	int			retries;
 };