diff mbox

[v2,3/3] libceph: handle dead tcp connections during connection negotiation

Message ID 1390302667-8729-4-git-send-email-ilya.dryomov@inktank.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ilya Dryomov Jan. 21, 2014, 11:11 a.m. UTC
Keepalive mechanism that we are currently using doesn't handle dead
(e.g. half-open in RFC 793 sense) TCP connections: a) it's based on
pending ceph_osd_requests which are not necessarily present, and b)
keepalive byte is only sent if connection is in CON_STATE_OPEN state
because of protocol restrictions.  This does not cover connection
handshake and negotiation stages and can lead to kernel client hanging
forever.  Fix it by forcibly resetting the connection if after two
mount_timeouts we still haven't transitioned to CON_STATE_OPEN.

Fixes: http://tracker.ceph.com/issues/7139

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
---
 include/linux/ceph/messenger.h |    5 +++++
 net/ceph/ceph_common.c         |    1 +
 net/ceph/messenger.c           |   42 ++++++++++++++++++++++++++++++++++++++--
 3 files changed, 46 insertions(+), 2 deletions(-)

Comments

Sage Weil Jan. 24, 2014, 11:07 p.m. UTC | #1
On Tue, 21 Jan 2014, Ilya Dryomov wrote:
> Keepalive mechanism that we are currently using doesn't handle dead
> (e.g. half-open in RFC 793 sense) TCP connections: a) it's based on
> pending ceph_osd_requests which are not necessarily present, and b)
> keepalive byte is only sent if connection is in CON_STATE_OPEN state
> because of protocol restrictions.  This does not cover connection
> handshake and negotiation stages and can lead to kernel client hanging
> forever.  Fix it by forcibly resetting the connection if after two
> mount_timeouts we still haven't transitioned to CON_STATE_OPEN.
> 
> Fixes: http://tracker.ceph.com/issues/7139
> 
> Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
> ---
>  include/linux/ceph/messenger.h |    5 +++++
>  net/ceph/ceph_common.c         |    1 +
>  net/ceph/messenger.c           |   42 ++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
> index 20ee8b63a968..25d3ec8bcdde 100644
> --- a/include/linux/ceph/messenger.h
> +++ b/include/linux/ceph/messenger.h
> @@ -62,6 +62,8 @@ struct ceph_messenger {
>  
>  	u64 supported_features;
>  	u64 required_features;
> +
> +	unsigned long connect_timeout;	/* jiffies */
>  };
>  
>  enum ceph_msg_data_type {
> @@ -240,6 +242,8 @@ struct ceph_connection {
>  
>  	struct delayed_work work;	    /* send|recv work */
>  	unsigned long       delay;          /* current delay interval */
> +
> +	struct delayed_work connect_timeout_work;
>  };
>  
>  
> @@ -257,6 +261,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr,
>  			struct ceph_entity_addr *myaddr,
>  			u64 supported_features,
>  			u64 required_features,
> +			unsigned long connect_timeout,
>  			bool nocrc);
>  
>  extern void ceph_con_init(struct ceph_connection *con, void *private,
> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
> index 67d7721d237e..bc41e5a392a4 100644
> --- a/net/ceph/ceph_common.c
> +++ b/net/ceph/ceph_common.c
> @@ -511,6 +511,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
>  	ceph_messenger_init(&client->msgr, myaddr,
>  		client->supported_features,
>  		client->required_features,
> +		2 * client->options->mount_timeout * HZ,

Any specific reason for 2x?  I would like 1/2 if I had to pull a number 
out of a hat.  OTOH, mount_timeout could be 0, too, so it might make more 
sense to make this a separate mount option entirely.  Either that, or just 
make this a separate hard-coded value that is not related to 
mount_timeout.

>  		ceph_test_opt(client, NOCRC));
>  
>  	/* subsystems */
> diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
> index c7e0143d24f1..65dea6c8c6f8 100644
> --- a/net/ceph/messenger.c
> +++ b/net/ceph/messenger.c
> @@ -385,6 +385,16 @@ static void con_fault_raise(struct ceph_connection *con)
>  	queue_con(con);
>  }
>  
> +static void handle_connect_timeout(struct work_struct *work)
> +{
> +	struct ceph_connection *con = container_of(work,
> +						   struct ceph_connection,
> +						   connect_timeout_work.work);
> +
> +	dout("%s connect timeout\n", __func__);
> +	con_fault_raise(con);
> +}
> +
>  
>  /*
>   * socket callback functions
> @@ -447,6 +457,18 @@ static void ceph_sock_state_change(struct sock *sk)
>  	case TCP_ESTABLISHED:
>  		dout("%s TCP_ESTABLISHED\n", __func__);
>  		con_sock_state_connected(con);
> +		/*
> +		 * If not told otherwise, arm connect_timeout hammer to
> +		 * be able to deal with half-open (in RFC 793 sense)
> +		 * sockets in the interim before we transition to
> +		 * CON_STATE_OPEN and regular request keepalive
> +		 * mechanism (ceph_con_keepalive()) kicks in.
> +		 */
> +		if (con->msgr->connect_timeout) {
> +			dout("arming connect_timeout hammer\n");
> +			schedule_delayed_work(&con->connect_timeout_work,
> +					      con->msgr->connect_timeout);
> +		}
>  		queue_con(con);
>  		break;
>  	default:	/* Everything else is uninteresting */
> @@ -586,6 +608,13 @@ static int con_close_socket(struct ceph_connection *con)
>  	int rc = 0;
>  
>  	dout("con_close_socket on %p sock %p\n", con, con->sock);
> +
> +	/*
> +	 * Sync with con_fault_raise() in handle_connect_timeout() to
> +	 * make sure that SOCK_CLOSED is going to be cleared for good.
> +	 */
> +	cancel_delayed_work_sync(&con->connect_timeout_work);

At first I thought there would be locking problems here, but AFAICS there 
is no real difference between this timer firing and a normal state 
change callback from the socket.

Aside from the timeout nit, this looks right!

sage

> +
>  	if (con->sock) {
>  		rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
>  		sock_release(con->sock);
> @@ -595,8 +624,8 @@ static int con_close_socket(struct ceph_connection *con)
>  	/*
>  	 * Forcibly clear the SOCK_CLOSED flag.  It gets set
>  	 * independent of the connection mutex, and we could have
> -	 * received a socket close event before we had the chance to
> -	 * shut the socket down.
> +	 * received a socket close and/or connect_timeout event
> +	 * before we had the chance to shut the socket down.
>  	 */
>  	con_flag_clear(con, CON_FLAG_SOCK_CLOSED);
>  
> @@ -725,6 +754,7 @@ void ceph_con_init(struct ceph_connection *con, void *private,
>  	INIT_LIST_HEAD(&con->out_queue);
>  	INIT_LIST_HEAD(&con->out_sent);
>  	INIT_DELAYED_WORK(&con->work, con_work);
> +	INIT_DELAYED_WORK(&con->connect_timeout_work, handle_connect_timeout);
>  
>  	con->state = CON_STATE_CLOSED;
>  }
> @@ -2076,6 +2106,7 @@ static int process_connect(struct ceph_connection *con)
>  
>  		WARN_ON(con->state != CON_STATE_NEGOTIATING);
>  		con->state = CON_STATE_OPEN;
> +
>  		con->auth_retry = 0;    /* we authenticated; clear flag */
>  		con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
>  		con->connect_seq++;
> @@ -2092,6 +2123,11 @@ static int process_connect(struct ceph_connection *con)
>  
>  		con->delay = 0;      /* reset backoff memory */
>  
> +		if (con->msgr->connect_timeout) {
> +			dout("disarming connect_timeout hammer\n");
> +			cancel_delayed_work(&con->connect_timeout_work);
> +		}
> +
>  		if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
>  			prepare_write_seq(con);
>  			prepare_read_seq(con);
> @@ -2866,10 +2902,12 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
>  			struct ceph_entity_addr *myaddr,
>  			u64 supported_features,
>  			u64 required_features,
> +			unsigned long connect_timeout,
>  			bool nocrc)
>  {
>  	msgr->supported_features = supported_features;
>  	msgr->required_features = required_features;
> +	msgr->connect_timeout = connect_timeout;
>  
>  	spin_lock_init(&msgr->global_seq_lock);
>  
> -- 
> 1.7.10.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilya Dryomov Jan. 26, 2014, 10:16 a.m. UTC | #2
On Sat, Jan 25, 2014 at 1:07 AM, Sage Weil <sage@inktank.com> wrote:
> On Tue, 21 Jan 2014, Ilya Dryomov wrote:
>> Keepalive mechanism that we are currently using doesn't handle dead
>> (e.g. half-open in RFC 793 sense) TCP connections: a) it's based on
>> pending ceph_osd_requests which are not necessarily present, and b)
>> keepalive byte is only sent if connection is in CON_STATE_OPEN state
>> because of protocol restrictions.  This does not cover connection
>> handshake and negotiation stages and can lead to kernel client hanging
>> forever.  Fix it by forcibly resetting the connection if after two
>> mount_timeouts we still haven't transitioned to CON_STATE_OPEN.
>>
>> Fixes: http://tracker.ceph.com/issues/7139
>>
>> Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
>> ---
>>  include/linux/ceph/messenger.h |    5 +++++
>>  net/ceph/ceph_common.c         |    1 +
>>  net/ceph/messenger.c           |   42 ++++++++++++++++++++++++++++++++++++++--
>>  3 files changed, 46 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
>> index 20ee8b63a968..25d3ec8bcdde 100644
>> --- a/include/linux/ceph/messenger.h
>> +++ b/include/linux/ceph/messenger.h
>> @@ -62,6 +62,8 @@ struct ceph_messenger {
>>
>>       u64 supported_features;
>>       u64 required_features;
>> +
>> +     unsigned long connect_timeout;  /* jiffies */
>>  };
>>
>>  enum ceph_msg_data_type {
>> @@ -240,6 +242,8 @@ struct ceph_connection {
>>
>>       struct delayed_work work;           /* send|recv work */
>>       unsigned long       delay;          /* current delay interval */
>> +
>> +     struct delayed_work connect_timeout_work;
>>  };
>>
>>
>> @@ -257,6 +261,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr,
>>                       struct ceph_entity_addr *myaddr,
>>                       u64 supported_features,
>>                       u64 required_features,
>> +                     unsigned long connect_timeout,
>>                       bool nocrc);
>>
>>  extern void ceph_con_init(struct ceph_connection *con, void *private,
>> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
>> index 67d7721d237e..bc41e5a392a4 100644
>> --- a/net/ceph/ceph_common.c
>> +++ b/net/ceph/ceph_common.c
>> @@ -511,6 +511,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
>>       ceph_messenger_init(&client->msgr, myaddr,
>>               client->supported_features,
>>               client->required_features,
>> +             2 * client->options->mount_timeout * HZ,
>
> Any specific reason for 2x?  I would like 1/2 if I had to pull a number
> out of a hat.  OTOH, mount_timeout could be 0, too, so it might make more
> sense to make this a separate mount option entirely.  Either that, or just
> make this a separate hard-coded value that is not related to
> mount_timeout.

Yes, my logic for 2x was basically to not impose anything on the rest
the system.  Apart from some cephfs-specific stuff, mount_timeout is
used in __ceph_open_session, where 1/2 would theoretically interfere
with the wait-for-maps loop by virtue of shutting down the socket too
early.

On mount_timeout=0 connect_timeout hammer isn't armed.  This was useful
in testing, and it doesn't seem to change existing semantics at all.
On the contrary, as it is, mount_timeout == 0 is a possible infinite
loop anyway, so not arming connect_timeout (i.e. not defending against
another endless hang) maps onto it quite nicely ;)

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilya Dryomov Jan. 26, 2014, 12:43 p.m. UTC | #3
I've looked into this some more and I now think connect_timeout
approach not the best way to fix this.  I think a better way is to go
through all connection and negotiation waits on both rbd and cephfs
sides and make sure they are interruptible and have a timeout attached.
Only if that turns out to be not feasible should we go with this
connect_timeout bolt-on.

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil Jan. 26, 2014, 4:32 p.m. UTC | #4
On Sun, 26 Jan 2014, Ilya Dryomov wrote:
> On Sat, Jan 25, 2014 at 1:07 AM, Sage Weil <sage@inktank.com> wrote:
> > On Tue, 21 Jan 2014, Ilya Dryomov wrote:
> >> Keepalive mechanism that we are currently using doesn't handle dead
> >> (e.g. half-open in RFC 793 sense) TCP connections: a) it's based on
> >> pending ceph_osd_requests which are not necessarily present, and b)
> >> keepalive byte is only sent if connection is in CON_STATE_OPEN state
> >> because of protocol restrictions.  This does not cover connection
> >> handshake and negotiation stages and can lead to kernel client hanging
> >> forever.  Fix it by forcibly resetting the connection if after two
> >> mount_timeouts we still haven't transitioned to CON_STATE_OPEN.
> >>
> >> Fixes: http://tracker.ceph.com/issues/7139
> >>
> >> Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
> >> ---
> >>  include/linux/ceph/messenger.h |    5 +++++
> >>  net/ceph/ceph_common.c         |    1 +
> >>  net/ceph/messenger.c           |   42 ++++++++++++++++++++++++++++++++++++++--
> >>  3 files changed, 46 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
> >> index 20ee8b63a968..25d3ec8bcdde 100644
> >> --- a/include/linux/ceph/messenger.h
> >> +++ b/include/linux/ceph/messenger.h
> >> @@ -62,6 +62,8 @@ struct ceph_messenger {
> >>
> >>       u64 supported_features;
> >>       u64 required_features;
> >> +
> >> +     unsigned long connect_timeout;  /* jiffies */
> >>  };
> >>
> >>  enum ceph_msg_data_type {
> >> @@ -240,6 +242,8 @@ struct ceph_connection {
> >>
> >>       struct delayed_work work;           /* send|recv work */
> >>       unsigned long       delay;          /* current delay interval */
> >> +
> >> +     struct delayed_work connect_timeout_work;
> >>  };
> >>
> >>
> >> @@ -257,6 +261,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr,
> >>                       struct ceph_entity_addr *myaddr,
> >>                       u64 supported_features,
> >>                       u64 required_features,
> >> +                     unsigned long connect_timeout,
> >>                       bool nocrc);
> >>
> >>  extern void ceph_con_init(struct ceph_connection *con, void *private,
> >> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
> >> index 67d7721d237e..bc41e5a392a4 100644
> >> --- a/net/ceph/ceph_common.c
> >> +++ b/net/ceph/ceph_common.c
> >> @@ -511,6 +511,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
> >>       ceph_messenger_init(&client->msgr, myaddr,
> >>               client->supported_features,
> >>               client->required_features,
> >> +             2 * client->options->mount_timeout * HZ,
> >
> > Any specific reason for 2x?  I would like 1/2 if I had to pull a number
> > out of a hat.  OTOH, mount_timeout could be 0, too, so it might make more
> > sense to make this a separate mount option entirely.  Either that, or just
> > make this a separate hard-coded value that is not related to
> > mount_timeout.
> 
> Yes, my logic for 2x was basically to not impose anything on the rest
> the system.  Apart from some cephfs-specific stuff, mount_timeout is
> used in __ceph_open_session, where 1/2 would theoretically interfere
> with the wait-for-maps loop by virtue of shutting down the socket too
> early.

If this is essentially manifesting as a socket reset/disconnect, then 
having it at 1/2 should trigger a ceph_fault and then reconnect within the 
messenger layer and give us a second chance before the mount_timeout 
expires.  At least, that's the way I'm reading the con_work:

		if ((fault = con_sock_closed(con))) {
			dout("%s: con %p SOCK_CLOSED\n", __func__, con);
			break;
		}

...

	if (fault)
		con_fault(con);

I still lean toward a separately defined timeout here...

> On mount_timeout=0 connect_timeout hammer isn't armed.  This was useful
> in testing, and it doesn't seem to change existing semantics at all.
> On the contrary, as it is, mount_timeout == 0 is a possible infinite
> loop anyway, so not arming connect_timeout (i.e. not defending against
> another endless hang) maps onto it quite nicely ;)

Sounds good.

sage
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 20ee8b63a968..25d3ec8bcdde 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -62,6 +62,8 @@  struct ceph_messenger {
 
 	u64 supported_features;
 	u64 required_features;
+
+	unsigned long connect_timeout;	/* jiffies */
 };
 
 enum ceph_msg_data_type {
@@ -240,6 +242,8 @@  struct ceph_connection {
 
 	struct delayed_work work;	    /* send|recv work */
 	unsigned long       delay;          /* current delay interval */
+
+	struct delayed_work connect_timeout_work;
 };
 
 
@@ -257,6 +261,7 @@  extern void ceph_messenger_init(struct ceph_messenger *msgr,
 			struct ceph_entity_addr *myaddr,
 			u64 supported_features,
 			u64 required_features,
+			unsigned long connect_timeout,
 			bool nocrc);
 
 extern void ceph_con_init(struct ceph_connection *con, void *private,
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 67d7721d237e..bc41e5a392a4 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -511,6 +511,7 @@  struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
 	ceph_messenger_init(&client->msgr, myaddr,
 		client->supported_features,
 		client->required_features,
+		2 * client->options->mount_timeout * HZ,
 		ceph_test_opt(client, NOCRC));
 
 	/* subsystems */
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index c7e0143d24f1..65dea6c8c6f8 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -385,6 +385,16 @@  static void con_fault_raise(struct ceph_connection *con)
 	queue_con(con);
 }
 
+static void handle_connect_timeout(struct work_struct *work)
+{
+	struct ceph_connection *con = container_of(work,
+						   struct ceph_connection,
+						   connect_timeout_work.work);
+
+	dout("%s connect timeout\n", __func__);
+	con_fault_raise(con);
+}
+
 
 /*
  * socket callback functions
@@ -447,6 +457,18 @@  static void ceph_sock_state_change(struct sock *sk)
 	case TCP_ESTABLISHED:
 		dout("%s TCP_ESTABLISHED\n", __func__);
 		con_sock_state_connected(con);
+		/*
+		 * If not told otherwise, arm connect_timeout hammer to
+		 * be able to deal with half-open (in RFC 793 sense)
+		 * sockets in the interim before we transition to
+		 * CON_STATE_OPEN and regular request keepalive
+		 * mechanism (ceph_con_keepalive()) kicks in.
+		 */
+		if (con->msgr->connect_timeout) {
+			dout("arming connect_timeout hammer\n");
+			schedule_delayed_work(&con->connect_timeout_work,
+					      con->msgr->connect_timeout);
+		}
 		queue_con(con);
 		break;
 	default:	/* Everything else is uninteresting */
@@ -586,6 +608,13 @@  static int con_close_socket(struct ceph_connection *con)
 	int rc = 0;
 
 	dout("con_close_socket on %p sock %p\n", con, con->sock);
+
+	/*
+	 * Sync with con_fault_raise() in handle_connect_timeout() to
+	 * make sure that SOCK_CLOSED is going to be cleared for good.
+	 */
+	cancel_delayed_work_sync(&con->connect_timeout_work);
+
 	if (con->sock) {
 		rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
 		sock_release(con->sock);
@@ -595,8 +624,8 @@  static int con_close_socket(struct ceph_connection *con)
 	/*
 	 * Forcibly clear the SOCK_CLOSED flag.  It gets set
 	 * independent of the connection mutex, and we could have
-	 * received a socket close event before we had the chance to
-	 * shut the socket down.
+	 * received a socket close and/or connect_timeout event
+	 * before we had the chance to shut the socket down.
 	 */
 	con_flag_clear(con, CON_FLAG_SOCK_CLOSED);
 
@@ -725,6 +754,7 @@  void ceph_con_init(struct ceph_connection *con, void *private,
 	INIT_LIST_HEAD(&con->out_queue);
 	INIT_LIST_HEAD(&con->out_sent);
 	INIT_DELAYED_WORK(&con->work, con_work);
+	INIT_DELAYED_WORK(&con->connect_timeout_work, handle_connect_timeout);
 
 	con->state = CON_STATE_CLOSED;
 }
@@ -2076,6 +2106,7 @@  static int process_connect(struct ceph_connection *con)
 
 		WARN_ON(con->state != CON_STATE_NEGOTIATING);
 		con->state = CON_STATE_OPEN;
+
 		con->auth_retry = 0;    /* we authenticated; clear flag */
 		con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
 		con->connect_seq++;
@@ -2092,6 +2123,11 @@  static int process_connect(struct ceph_connection *con)
 
 		con->delay = 0;      /* reset backoff memory */
 
+		if (con->msgr->connect_timeout) {
+			dout("disarming connect_timeout hammer\n");
+			cancel_delayed_work(&con->connect_timeout_work);
+		}
+
 		if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
 			prepare_write_seq(con);
 			prepare_read_seq(con);
@@ -2866,10 +2902,12 @@  void ceph_messenger_init(struct ceph_messenger *msgr,
 			struct ceph_entity_addr *myaddr,
 			u64 supported_features,
 			u64 required_features,
+			unsigned long connect_timeout,
 			bool nocrc)
 {
 	msgr->supported_features = supported_features;
 	msgr->required_features = required_features;
+	msgr->connect_timeout = connect_timeout;
 
 	spin_lock_init(&msgr->global_seq_lock);