diff mbox series

[v3,17/33] nbd/client-connection: implement connection retry

Message ID 20210416080911.83197-18-vsementsov@virtuozzo.com (mailing list archive)
State New, archived
Headers show
Series block/nbd: rework client connection | expand

Commit Message

Vladimir Sementsov-Ogievskiy April 16, 2021, 8:08 a.m. UTC
Add an option for thread to retry connection until success. We'll use
nbd/client-connection both for reconnect and for initial connection in
nbd_open(), so we need a possibility to use same NBDClientConnection
instance to connect once in nbd_open() and then use retry semantics for
reconnect.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 include/block/nbd.h     |  2 ++
 nbd/client-connection.c | 55 +++++++++++++++++++++++++++++------------
 2 files changed, 41 insertions(+), 16 deletions(-)

Comments

Roman Kagan May 11, 2021, 8:54 p.m. UTC | #1
On Fri, Apr 16, 2021 at 11:08:55AM +0300, Vladimir Sementsov-Ogievskiy wrote:
> Add an option for thread to retry connection until success. We'll use
> nbd/client-connection both for reconnect and for initial connection in
> nbd_open(), so we need a possibility to use same NBDClientConnection
> instance to connect once in nbd_open() and then use retry semantics for
> reconnect.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  include/block/nbd.h     |  2 ++
>  nbd/client-connection.c | 55 +++++++++++++++++++++++++++++------------
>  2 files changed, 41 insertions(+), 16 deletions(-)
> 
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index 5d86e6a393..5bb54d831c 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -409,6 +409,8 @@ const char *nbd_err_lookup(int err);
>  /* nbd/client-connection.c */
>  typedef struct NBDClientConnection NBDClientConnection;
>  
> +void nbd_client_connection_enable_retry(NBDClientConnection *conn);
> +
>  NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
>                                                 bool do_negotiation,
>                                                 const char *export_name,
> diff --git a/nbd/client-connection.c b/nbd/client-connection.c
> index ae4a77f826..002bd91f42 100644
> --- a/nbd/client-connection.c
> +++ b/nbd/client-connection.c
> @@ -36,6 +36,8 @@ struct NBDClientConnection {
>      NBDExportInfo initial_info;
>      bool do_negotiation;
>  
> +    bool do_retry;
> +
>      /*
>       * Result of last attempt. Valid in FAIL and SUCCESS states.
>       * If you want to steal error, don't forget to set pointer to NULL.
> @@ -52,6 +54,15 @@ struct NBDClientConnection {
>      Coroutine *wait_co; /* nbd_co_establish_connection() wait in yield() */
>  };
>  
> +/*
> + * The function isn't protected by any mutex, so call it when thread is not
> + * running.
> + */
> +void nbd_client_connection_enable_retry(NBDClientConnection *conn)
> +{
> +    conn->do_retry = true;
> +}
> +
>  NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
>                                                 bool do_negotiation,
>                                                 const char *export_name,
> @@ -144,24 +155,37 @@ static void *connect_thread_func(void *opaque)
>      NBDClientConnection *conn = opaque;
>      bool do_free;
>      int ret;
> +    uint64_t timeout = 1;
> +    uint64_t max_timeout = 16;
> +
> +    while (true) {
> +        conn->sioc = qio_channel_socket_new();
> +
> +        error_free(conn->err);
> +        conn->err = NULL;
> +        conn->updated_info = conn->initial_info;
> +
> +        ret = nbd_connect(conn->sioc, conn->saddr,
> +                          conn->do_negotiation ? &conn->updated_info : NULL,
> +                          conn->tlscreds, &conn->ioc, &conn->err);
> +        conn->updated_info.x_dirty_bitmap = NULL;
> +        conn->updated_info.name = NULL;
> +
> +        if (ret < 0) {
> +            object_unref(OBJECT(conn->sioc));
> +            conn->sioc = NULL;
> +            if (conn->do_retry) {
> +                sleep(timeout);
> +                if (timeout < max_timeout) {
> +                    timeout *= 2;
> +                }
> +                continue;
> +            }
> +        }

How is it supposed to get canceled?

Roman.

> -    conn->sioc = qio_channel_socket_new();
> -
> -    error_free(conn->err);
> -    conn->err = NULL;
> -    conn->updated_info = conn->initial_info;
> -
> -    ret = nbd_connect(conn->sioc, conn->saddr,
> -                      conn->do_negotiation ? &conn->updated_info : NULL,
> -                      conn->tlscreds, &conn->ioc, &conn->err);
> -    if (ret < 0) {
> -        object_unref(OBJECT(conn->sioc));
> -        conn->sioc = NULL;
> +        break;
>      }
>  
> -    conn->updated_info.x_dirty_bitmap = NULL;
> -    conn->updated_info.name = NULL;
> -
>      WITH_QEMU_LOCK_GUARD(&conn->mutex) {
>          assert(conn->running);
>          conn->running = false;
> @@ -172,7 +196,6 @@ static void *connect_thread_func(void *opaque)
>          do_free = conn->detached;
>      }
>  
> -
>      if (do_free) {
>          nbd_client_connection_do_free(conn);
>      }
> -- 
> 2.29.2
>
Eric Blake June 3, 2021, 4:17 p.m. UTC | #2
On Fri, Apr 16, 2021 at 11:08:55AM +0300, Vladimir Sementsov-Ogievskiy wrote:
> Add an option for thread to retry connection until success. We'll use

for a thread to retry connection until it succeeds.

> nbd/client-connection both for reconnect and for initial connection in
> nbd_open(), so we need a possibility to use same NBDClientConnection
> instance to connect once in nbd_open() and then use retry semantics for
> reconnect.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  include/block/nbd.h     |  2 ++
>  nbd/client-connection.c | 55 +++++++++++++++++++++++++++++------------
>  2 files changed, 41 insertions(+), 16 deletions(-)
> 
> +++ b/nbd/client-connection.c
> @@ -36,6 +36,8 @@ struct NBDClientConnection {
>      NBDExportInfo initial_info;
>      bool do_negotiation;
>  
> +    bool do_retry;
> +
>      /*
>       * Result of last attempt. Valid in FAIL and SUCCESS states.
>       * If you want to steal error, don't forget to set pointer to NULL.
> @@ -52,6 +54,15 @@ struct NBDClientConnection {
>      Coroutine *wait_co; /* nbd_co_establish_connection() wait in yield() */
>  };
>  
> +/*
> + * The function isn't protected by any mutex, so call it when thread is not

so only call it when the thread is not yet running

or maybe even

only call it when the client connection attempt has not yet started

> + * running.
> + */
> +void nbd_client_connection_enable_retry(NBDClientConnection *conn)
> +{
> +    conn->do_retry = true;
> +}
> +
>  NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
>                                                 bool do_negotiation,
>                                                 const char *export_name,
> @@ -144,24 +155,37 @@ static void *connect_thread_func(void *opaque)
>      NBDClientConnection *conn = opaque;
>      bool do_free;
>      int ret;
> +    uint64_t timeout = 1;
> +    uint64_t max_timeout = 16;
> +
> +    while (true) {
> +        conn->sioc = qio_channel_socket_new();
> +
> +        error_free(conn->err);
> +        conn->err = NULL;
> +        conn->updated_info = conn->initial_info;
> +
> +        ret = nbd_connect(conn->sioc, conn->saddr,
> +                          conn->do_negotiation ? &conn->updated_info : NULL,
> +                          conn->tlscreds, &conn->ioc, &conn->err);
> +        conn->updated_info.x_dirty_bitmap = NULL;
> +        conn->updated_info.name = NULL;

I'm not quite sure I follow the allocation here: if we passed in
&conn->updated_info which got modified in-place by nbd_connect, then
are we risking a memory leak by ignoring the x_dirty_bitmap and name
set by that call?

> +
> +        if (ret < 0) {
> +            object_unref(OBJECT(conn->sioc));
> +            conn->sioc = NULL;
> +            if (conn->do_retry) {
> +                sleep(timeout);

This is a bare sleep in a function not marked as coroutine_fn.  Do we
need to instead use coroutine sleep for better response to an early
exit if initialization is taking too long?

> +                if (timeout < max_timeout) {
> +                    timeout *= 2;
> +                }
> +                continue;
> +            }
> +        }
>  
> -    conn->sioc = qio_channel_socket_new();
> -
> -    error_free(conn->err);
> -    conn->err = NULL;
> -    conn->updated_info = conn->initial_info;
> -
> -    ret = nbd_connect(conn->sioc, conn->saddr,
> -                      conn->do_negotiation ? &conn->updated_info : NULL,
> -                      conn->tlscreds, &conn->ioc, &conn->err);
> -    if (ret < 0) {
> -        object_unref(OBJECT(conn->sioc));
> -        conn->sioc = NULL;
> +        break;
>      }
>  
> -    conn->updated_info.x_dirty_bitmap = NULL;
> -    conn->updated_info.name = NULL;
> -
>      WITH_QEMU_LOCK_GUARD(&conn->mutex) {
>          assert(conn->running);
>          conn->running = false;
> @@ -172,7 +196,6 @@ static void *connect_thread_func(void *opaque)
>          do_free = conn->detached;
>      }
>  
> -
>      if (do_free) {
>          nbd_client_connection_do_free(conn);

Spurious hunk?
Vladimir Sementsov-Ogievskiy June 3, 2021, 5:49 p.m. UTC | #3
03.06.2021 19:17, Eric Blake wrote:
> On Fri, Apr 16, 2021 at 11:08:55AM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> Add an option for thread to retry connection until success. We'll use
> 
> for a thread to retry connection until it succeeds.
> 
>> nbd/client-connection both for reconnect and for initial connection in
>> nbd_open(), so we need a possibility to use same NBDClientConnection
>> instance to connect once in nbd_open() and then use retry semantics for
>> reconnect.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>>   include/block/nbd.h     |  2 ++
>>   nbd/client-connection.c | 55 +++++++++++++++++++++++++++++------------
>>   2 files changed, 41 insertions(+), 16 deletions(-)
>>
>> +++ b/nbd/client-connection.c
>> @@ -36,6 +36,8 @@ struct NBDClientConnection {
>>       NBDExportInfo initial_info;
>>       bool do_negotiation;
>>   
>> +    bool do_retry;
>> +
>>       /*
>>        * Result of last attempt. Valid in FAIL and SUCCESS states.
>>        * If you want to steal error, don't forget to set pointer to NULL.
>> @@ -52,6 +54,15 @@ struct NBDClientConnection {
>>       Coroutine *wait_co; /* nbd_co_establish_connection() wait in yield() */
>>   };
>>   
>> +/*
>> + * The function isn't protected by any mutex, so call it when thread is not
> 
> so only call it when the thread is not yet running
> 
> or maybe even
> 
> only call it when the client connection attempt has not yet started
> 
>> + * running.
>> + */
>> +void nbd_client_connection_enable_retry(NBDClientConnection *conn)
>> +{
>> +    conn->do_retry = true;
>> +}
>> +
>>   NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
>>                                                  bool do_negotiation,
>>                                                  const char *export_name,
>> @@ -144,24 +155,37 @@ static void *connect_thread_func(void *opaque)
>>       NBDClientConnection *conn = opaque;
>>       bool do_free;
>>       int ret;
>> +    uint64_t timeout = 1;
>> +    uint64_t max_timeout = 16;
>> +
>> +    while (true) {
>> +        conn->sioc = qio_channel_socket_new();
>> +
>> +        error_free(conn->err);
>> +        conn->err = NULL;
>> +        conn->updated_info = conn->initial_info;
>> +
>> +        ret = nbd_connect(conn->sioc, conn->saddr,
>> +                          conn->do_negotiation ? &conn->updated_info : NULL,
>> +                          conn->tlscreds, &conn->ioc, &conn->err);
>> +        conn->updated_info.x_dirty_bitmap = NULL;
>> +        conn->updated_info.name = NULL;
> 
> I'm not quite sure I follow the allocation here: if we passed in
> &conn->updated_info which got modified in-place by nbd_connect, then
> are we risking a memory leak by ignoring the x_dirty_bitmap and name
> set by that call?

Yes, that looks strange :\. Will check when prepare new version and fix or leave a comment here.

> 
>> +
>> +        if (ret < 0) {
>> +            object_unref(OBJECT(conn->sioc));
>> +            conn->sioc = NULL;
>> +            if (conn->do_retry) {
>> +                sleep(timeout);
> 
> This is a bare sleep in a function not marked as coroutine_fn.  Do we
> need to instead use coroutine sleep for better response to an early
> exit if initialization is taking too long?

We are in a separate, by-hand created thread, which knows nothing about coroutines, iothreads, aio contexts etc.. I think bare sleep is what should be here.

> 
>> +                if (timeout < max_timeout) {
>> +                    timeout *= 2;
>> +                }
>> +                continue;
>> +            }
>> +        }
>>   
>> -    conn->sioc = qio_channel_socket_new();
>> -
>> -    error_free(conn->err);
>> -    conn->err = NULL;
>> -    conn->updated_info = conn->initial_info;
>> -
>> -    ret = nbd_connect(conn->sioc, conn->saddr,
>> -                      conn->do_negotiation ? &conn->updated_info : NULL,
>> -                      conn->tlscreds, &conn->ioc, &conn->err);
>> -    if (ret < 0) {
>> -        object_unref(OBJECT(conn->sioc));
>> -        conn->sioc = NULL;
>> +        break;
>>       }
>>   
>> -    conn->updated_info.x_dirty_bitmap = NULL;
>> -    conn->updated_info.name = NULL;
>> -
>>       WITH_QEMU_LOCK_GUARD(&conn->mutex) {
>>           assert(conn->running);
>>           conn->running = false;
>> @@ -172,7 +196,6 @@ static void *connect_thread_func(void *opaque)
>>           do_free = conn->detached;
>>       }
>>   
>> -
>>       if (do_free) {
>>           nbd_client_connection_do_free(conn);
> 
> Spurious hunk?
> 

wull drop
Vladimir Sementsov-Ogievskiy June 8, 2021, 10:24 a.m. UTC | #4
11.05.2021 23:54, Roman Kagan wrote:
> On Fri, Apr 16, 2021 at 11:08:55AM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> Add an option for thread to retry connection until success. We'll use
>> nbd/client-connection both for reconnect and for initial connection in
>> nbd_open(), so we need a possibility to use same NBDClientConnection
>> instance to connect once in nbd_open() and then use retry semantics for
>> reconnect.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>>   include/block/nbd.h     |  2 ++
>>   nbd/client-connection.c | 55 +++++++++++++++++++++++++++++------------
>>   2 files changed, 41 insertions(+), 16 deletions(-)
>>
>> diff --git a/include/block/nbd.h b/include/block/nbd.h
>> index 5d86e6a393..5bb54d831c 100644
>> --- a/include/block/nbd.h
>> +++ b/include/block/nbd.h
>> @@ -409,6 +409,8 @@ const char *nbd_err_lookup(int err);
>>   /* nbd/client-connection.c */
>>   typedef struct NBDClientConnection NBDClientConnection;
>>   
>> +void nbd_client_connection_enable_retry(NBDClientConnection *conn);
>> +
>>   NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
>>                                                  bool do_negotiation,
>>                                                  const char *export_name,
>> diff --git a/nbd/client-connection.c b/nbd/client-connection.c
>> index ae4a77f826..002bd91f42 100644
>> --- a/nbd/client-connection.c
>> +++ b/nbd/client-connection.c
>> @@ -36,6 +36,8 @@ struct NBDClientConnection {
>>       NBDExportInfo initial_info;
>>       bool do_negotiation;
>>   
>> +    bool do_retry;
>> +
>>       /*
>>        * Result of last attempt. Valid in FAIL and SUCCESS states.
>>        * If you want to steal error, don't forget to set pointer to NULL.
>> @@ -52,6 +54,15 @@ struct NBDClientConnection {
>>       Coroutine *wait_co; /* nbd_co_establish_connection() wait in yield() */
>>   };
>>   
>> +/*
>> + * The function isn't protected by any mutex, so call it when thread is not
>> + * running.
>> + */
>> +void nbd_client_connection_enable_retry(NBDClientConnection *conn)
>> +{
>> +    conn->do_retry = true;
>> +}
>> +
>>   NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
>>                                                  bool do_negotiation,
>>                                                  const char *export_name,
>> @@ -144,24 +155,37 @@ static void *connect_thread_func(void *opaque)
>>       NBDClientConnection *conn = opaque;
>>       bool do_free;
>>       int ret;
>> +    uint64_t timeout = 1;
>> +    uint64_t max_timeout = 16;
>> +
>> +    while (true) {
>> +        conn->sioc = qio_channel_socket_new();
>> +
>> +        error_free(conn->err);
>> +        conn->err = NULL;
>> +        conn->updated_info = conn->initial_info;
>> +
>> +        ret = nbd_connect(conn->sioc, conn->saddr,
>> +                          conn->do_negotiation ? &conn->updated_info : NULL,
>> +                          conn->tlscreds, &conn->ioc, &conn->err);
>> +        conn->updated_info.x_dirty_bitmap = NULL;
>> +        conn->updated_info.name = NULL;
>> +
>> +        if (ret < 0) {
>> +            object_unref(OBJECT(conn->sioc));
>> +            conn->sioc = NULL;
>> +            if (conn->do_retry) {
>> +                sleep(timeout);
>> +                if (timeout < max_timeout) {
>> +                    timeout *= 2;
>> +                }
>> +                continue;
>> +            }
>> +        }
> 
> How is it supposed to get canceled?
> 

Next commit does it
  
>> -    conn->sioc = qio_channel_socket_new();
>> -
>> -    error_free(conn->err);
>> -    conn->err = NULL;
>> -    conn->updated_info = conn->initial_info;
>> -
>> -    ret = nbd_connect(conn->sioc, conn->saddr,
>> -                      conn->do_negotiation ? &conn->updated_info : NULL,
>> -                      conn->tlscreds, &conn->ioc, &conn->err);
>> -    if (ret < 0) {
>> -        object_unref(OBJECT(conn->sioc));
>> -        conn->sioc = NULL;
>> +        break;
>>       }
>>   
>> -    conn->updated_info.x_dirty_bitmap = NULL;
>> -    conn->updated_info.name = NULL;
>> -
>>       WITH_QEMU_LOCK_GUARD(&conn->mutex) {
>>           assert(conn->running);
>>           conn->running = false;
>> @@ -172,7 +196,6 @@ static void *connect_thread_func(void *opaque)
>>           do_free = conn->detached;
>>       }
>>   
>> -
>>       if (do_free) {
>>           nbd_client_connection_do_free(conn);
>>       }
>> -- 
>> 2.29.2
>>
Vladimir Sementsov-Ogievskiy June 8, 2021, 10:38 a.m. UTC | #5
03.06.2021 20:49, Vladimir Sementsov-Ogievskiy wrote:
> 03.06.2021 19:17, Eric Blake wrote:
>> On Fri, Apr 16, 2021 at 11:08:55AM +0300, Vladimir Sementsov-Ogievskiy wrote:
>>> Add an option for thread to retry connection until success. We'll use
>>
>> for a thread to retry connection until it succeeds.
>>
>>> nbd/client-connection both for reconnect and for initial connection in
>>> nbd_open(), so we need a possibility to use same NBDClientConnection
>>> instance to connect once in nbd_open() and then use retry semantics for
>>> reconnect.
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>> ---
>>>   include/block/nbd.h     |  2 ++
>>>   nbd/client-connection.c | 55 +++++++++++++++++++++++++++++------------
>>>   2 files changed, 41 insertions(+), 16 deletions(-)
>>>
>>> +++ b/nbd/client-connection.c
>>> @@ -36,6 +36,8 @@ struct NBDClientConnection {
>>>       NBDExportInfo initial_info;
>>>       bool do_negotiation;
>>> +    bool do_retry;
>>> +
>>>       /*
>>>        * Result of last attempt. Valid in FAIL and SUCCESS states.
>>>        * If you want to steal error, don't forget to set pointer to NULL.
>>> @@ -52,6 +54,15 @@ struct NBDClientConnection {
>>>       Coroutine *wait_co; /* nbd_co_establish_connection() wait in yield() */
>>>   };
>>> +/*
>>> + * The function isn't protected by any mutex, so call it when thread is not
>>
>> so only call it when the thread is not yet running
>>
>> or maybe even
>>
>> only call it when the client connection attempt has not yet started
>>
>>> + * running.
>>> + */
>>> +void nbd_client_connection_enable_retry(NBDClientConnection *conn)
>>> +{
>>> +    conn->do_retry = true;
>>> +}
>>> +
>>>   NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
>>>                                                  bool do_negotiation,
>>>                                                  const char *export_name,
>>> @@ -144,24 +155,37 @@ static void *connect_thread_func(void *opaque)
>>>       NBDClientConnection *conn = opaque;
>>>       bool do_free;
>>>       int ret;
>>> +    uint64_t timeout = 1;
>>> +    uint64_t max_timeout = 16;
>>> +
>>> +    while (true) {
>>> +        conn->sioc = qio_channel_socket_new();
>>> +
>>> +        error_free(conn->err);
>>> +        conn->err = NULL;
>>> +        conn->updated_info = conn->initial_info;
>>> +
>>> +        ret = nbd_connect(conn->sioc, conn->saddr,
>>> +                          conn->do_negotiation ? &conn->updated_info : NULL,
>>> +                          conn->tlscreds, &conn->ioc, &conn->err);
>>> +        conn->updated_info.x_dirty_bitmap = NULL;
>>> +        conn->updated_info.name = NULL;
>>
>> I'm not quite sure I follow the allocation here: if we passed in
>> &conn->updated_info which got modified in-place by nbd_connect, then
>> are we risking a memory leak by ignoring the x_dirty_bitmap and name
>> set by that call?
> 
> Yes, that looks strange :\. Will check when prepare new version and fix or leave a comment here.

x_dirty_bitmap and name are not set by nbd_connect, they are IN parameters of nbd_receive_negotiate(). Their allocations are owned by conn->initial_info. So, here we've copied pointers into conn->updated_info. And then zero out them, when they are not needed anymore (and actually, to not store them and not finally return to the user our internal allocations). I'll add a comment here.

> 
>>
>>> +
>>> +        if (ret < 0) {
>>> +            object_unref(OBJECT(conn->sioc));
>>> +            conn->sioc = NULL;
>>> +            if (conn->do_retry) {
>>> +                sleep(timeout);
>>
>> This is a bare sleep in a function not marked as coroutine_fn.  Do we
>> need to instead use coroutine sleep for better response to an early
>> exit if initialization is taking too long?
> 
> We are in a separate, by-hand created thread, which knows nothing about coroutines, iothreads, aio contexts etc.. I think bare sleep is what should be here.
> 
>>
>>> +                if (timeout < max_timeout) {
>>> +                    timeout *= 2;
>>> +                }
>>> +                continue;
>>> +            }
>>> +        }
>>> -    conn->sioc = qio_channel_socket_new();
>>> -
>>> -    error_free(conn->err);
>>> -    conn->err = NULL;
>>> -    conn->updated_info = conn->initial_info;
>>> -
>>> -    ret = nbd_connect(conn->sioc, conn->saddr,
>>> -                      conn->do_negotiation ? &conn->updated_info : NULL,
>>> -                      conn->tlscreds, &conn->ioc, &conn->err);
>>> -    if (ret < 0) {
>>> -        object_unref(OBJECT(conn->sioc));
>>> -        conn->sioc = NULL;
>>> +        break;
>>>       }
>>> -    conn->updated_info.x_dirty_bitmap = NULL;
>>> -    conn->updated_info.name = NULL;
>>> -
>>>       WITH_QEMU_LOCK_GUARD(&conn->mutex) {
>>>           assert(conn->running);
>>>           conn->running = false;
>>> @@ -172,7 +196,6 @@ static void *connect_thread_func(void *opaque)
>>>           do_free = conn->detached;
>>>       }
>>> -
>>>       if (do_free) {
>>>           nbd_client_connection_do_free(conn);
>>
>> Spurious hunk?
>>
> 
> wull drop
>
diff mbox series

Patch

diff --git a/include/block/nbd.h b/include/block/nbd.h
index 5d86e6a393..5bb54d831c 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -409,6 +409,8 @@  const char *nbd_err_lookup(int err);
 /* nbd/client-connection.c */
 typedef struct NBDClientConnection NBDClientConnection;
 
+void nbd_client_connection_enable_retry(NBDClientConnection *conn);
+
 NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
                                                bool do_negotiation,
                                                const char *export_name,
diff --git a/nbd/client-connection.c b/nbd/client-connection.c
index ae4a77f826..002bd91f42 100644
--- a/nbd/client-connection.c
+++ b/nbd/client-connection.c
@@ -36,6 +36,8 @@  struct NBDClientConnection {
     NBDExportInfo initial_info;
     bool do_negotiation;
 
+    bool do_retry;
+
     /*
      * Result of last attempt. Valid in FAIL and SUCCESS states.
      * If you want to steal error, don't forget to set pointer to NULL.
@@ -52,6 +54,15 @@  struct NBDClientConnection {
     Coroutine *wait_co; /* nbd_co_establish_connection() wait in yield() */
 };
 
+/*
+ * The function isn't protected by any mutex, so call it when thread is not
+ * running.
+ */
+void nbd_client_connection_enable_retry(NBDClientConnection *conn)
+{
+    conn->do_retry = true;
+}
+
 NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
                                                bool do_negotiation,
                                                const char *export_name,
@@ -144,24 +155,37 @@  static void *connect_thread_func(void *opaque)
     NBDClientConnection *conn = opaque;
     bool do_free;
     int ret;
+    uint64_t timeout = 1;
+    uint64_t max_timeout = 16;
+
+    while (true) {
+        conn->sioc = qio_channel_socket_new();
+
+        error_free(conn->err);
+        conn->err = NULL;
+        conn->updated_info = conn->initial_info;
+
+        ret = nbd_connect(conn->sioc, conn->saddr,
+                          conn->do_negotiation ? &conn->updated_info : NULL,
+                          conn->tlscreds, &conn->ioc, &conn->err);
+        conn->updated_info.x_dirty_bitmap = NULL;
+        conn->updated_info.name = NULL;
+
+        if (ret < 0) {
+            object_unref(OBJECT(conn->sioc));
+            conn->sioc = NULL;
+            if (conn->do_retry) {
+                sleep(timeout);
+                if (timeout < max_timeout) {
+                    timeout *= 2;
+                }
+                continue;
+            }
+        }
 
-    conn->sioc = qio_channel_socket_new();
-
-    error_free(conn->err);
-    conn->err = NULL;
-    conn->updated_info = conn->initial_info;
-
-    ret = nbd_connect(conn->sioc, conn->saddr,
-                      conn->do_negotiation ? &conn->updated_info : NULL,
-                      conn->tlscreds, &conn->ioc, &conn->err);
-    if (ret < 0) {
-        object_unref(OBJECT(conn->sioc));
-        conn->sioc = NULL;
+        break;
     }
 
-    conn->updated_info.x_dirty_bitmap = NULL;
-    conn->updated_info.name = NULL;
-
     WITH_QEMU_LOCK_GUARD(&conn->mutex) {
         assert(conn->running);
         conn->running = false;
@@ -172,7 +196,6 @@  static void *connect_thread_func(void *opaque)
         do_free = conn->detached;
     }
 
-
     if (do_free) {
         nbd_client_connection_do_free(conn);
     }