@@ -477,6 +477,7 @@ struct lnet_ni *
bool lnet_is_ni_healthy_locked(struct lnet_ni *ni);
struct lnet_net *lnet_get_net_locked(u32 net_id);
+extern unsigned int lnet_transaction_timeout;
extern unsigned int lnet_numa_range;
extern unsigned int lnet_peer_discovery_disabled;
extern int portal_rotor;
@@ -92,6 +92,13 @@ struct lnet the_lnet = {
MODULE_PARM_DESC(lnet_peer_discovery_disabled,
"Set to 1 to disable peer discovery on this node.");
+unsigned int lnet_transaction_timeout = 5;
+static int transaction_to_set(const char *val, const struct kernel_param *kp);
+module_param_call(lnet_transaction_timeout, transaction_to_set, param_get_int,
+ &lnet_transaction_timeout, 0444);
+MODULE_PARM_DESC(lnet_transaction_timeout,
+ "Time in seconds to wait for a REPLY or an ACK");
+
/*
* This sequence number keeps track of how many times DLC was used to
* update the local NIs. It is incremented when a NI is added or
@@ -158,6 +165,43 @@ static int lnet_discover(struct lnet_process_id id, u32 force,
}
static int
+transaction_to_set(const char *val, const struct kernel_param *kp)
+{
+ unsigned int *transaction_to = (unsigned int *)kp->arg;
+ unsigned long value;
+ int rc;
+
+ rc = kstrtoul(val, 0, &value);
+ if (rc) {
+ CERROR("Invalid module parameter value for 'lnet_transaction_timeout'\n");
+ return rc;
+ }
+
+ /* The purpose of locking the api_mutex here is to ensure that
+ * the correct value ends up stored properly.
+ */
+ mutex_lock(&the_lnet.ln_api_mutex);
+
+ if (value == 0) {
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ CERROR("Invalid value for lnet_transaction_timeout (%lu).\n",
+ value);
+ return -EINVAL;
+ }
+
+ if (value == *transaction_to) {
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ return 0;
+ }
+
+ *transaction_to = value;
+
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return 0;
+}
+
+static int
intf_max_set(const char *val, const struct kernel_param *kp)
{
int value, rc;
@@ -2942,7 +2942,7 @@ static int lnet_peer_rediscover(struct lnet_peer *lp)
* obsessively re-check the clock. The oldest discovery request will
* be at the head of the queue.
*/
-static struct lnet_peer *lnet_peer_dc_timed_out(time64_t now)
+static struct lnet_peer *lnet_peer_get_dc_timed_out(time64_t now)
{
struct lnet_peer *lp;
@@ -2950,7 +2950,7 @@ static struct lnet_peer *lnet_peer_dc_timed_out(time64_t now)
return NULL;
lp = list_first_entry(&the_lnet.ln_dc_working,
struct lnet_peer, lp_dc_list);
- if (now < lp->lp_last_queued + DEFAULT_PEER_TIMEOUT)
+ if (now < lp->lp_last_queued + lnet_transaction_timeout)
return NULL;
return lp;
}
@@ -2961,7 +2961,7 @@ static struct lnet_peer *lnet_peer_dc_timed_out(time64_t now)
* lnet_discovery_event_handler() will proceed from here and complete
* the cleanup.
*/
-static void lnet_peer_discovery_timeout(struct lnet_peer *lp)
+static void lnet_peer_cancel_discovery(struct lnet_peer *lp)
{
struct lnet_handle_md ping_mdh;
struct lnet_handle_md push_mdh;
@@ -3010,7 +3010,7 @@ static int lnet_peer_discovery_wait_for_work(void)
break;
if (!list_empty(&the_lnet.ln_msg_resend))
break;
- if (lnet_peer_dc_timed_out(ktime_get_real_seconds()))
+ if (lnet_peer_get_dc_timed_out(ktime_get_real_seconds()))
break;
lnet_net_unlock(cpt);
@@ -3177,14 +3177,14 @@ static int lnet_peer_discovery(void *arg)
* taking too long. Move all that are found to the
* ln_dc_expired queue and time out any pending
* Ping or Push. We have to drop the lnet_net_lock
- * in the loop because lnet_peer_discovery_timeout()
+ * in the loop because lnet_peer_cancel_discovery()
* calls LNetMDUnlink().
*/
now = ktime_get_real_seconds();
- while ((lp = lnet_peer_dc_timed_out(now)) != NULL) {
+ while ((lp = lnet_peer_get_dc_timed_out(now)) != NULL) {
list_move(&lp->lp_dc_list, &the_lnet.ln_dc_expired);
lnet_net_unlock(LNET_LOCK_EX);
- lnet_peer_discovery_timeout(lp);
+ lnet_peer_cancel_discovery(lp);
lnet_net_lock(LNET_LOCK_EX);
}
@@ -3208,7 +3208,7 @@ static int lnet_peer_discovery(void *arg)
struct lnet_peer, lp_dc_list);
list_move(&lp->lp_dc_list, &the_lnet.ln_dc_expired);
lnet_net_unlock(LNET_LOCK_EX);
- lnet_peer_discovery_timeout(lp);
+ lnet_peer_cancel_discovery(lp);
lnet_net_lock(LNET_LOCK_EX);
}
lnet_net_unlock(LNET_LOCK_EX);