diff mbox

[16/18] nbd: Support NBD_CMD_CLOSE

Message ID 1460153158-21612-17-git-send-email-eblake@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Eric Blake April 8, 2016, 10:05 p.m. UTC
NBD_CMD_DISC is annoying: the server is not required to reply,
so the client has no choice but to disconnect once it has sent
the message; but depending on timing, the server can see the
disconnect prior to reading the request, and treat things as
an abrupt exit rather than a clean shutdown (which may affect
whether the server properly fsync()s data to disk, and so on).
The new NBD_CMD_CLOSE adds another round of handshake, where
the client waits for the server's action before closing, to
make sure both parties know that it was a clean close rather
than an accidental early disconnect.

In nbd-client.c, nbd_client_close() is called after we have
already exited the normal coroutine context used by all the
other transmission phase handlers, so the code is a bit more
complex to build up a coroutine just for the purpose of waiting
for the server's response.

Signed-off-by: Eric Blake <eblake@redhat.com>
---
 include/block/nbd.h |  4 +++-
 block/nbd-client.c  | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 nbd/server.c        | 19 +++++++++++++++++--
 3 files changed, 64 insertions(+), 4 deletions(-)

Comments

Alex Bligh April 9, 2016, 10:50 a.m. UTC | #1
On 8 Apr 2016, at 23:05, Eric Blake <eblake@redhat.com> wrote:

> NBD_CMD_DISC is annoying: the server is not required to reply,
> so the client has no choice but to disconnect once it has sent
> the message; but depending on timing, the server can see the
> disconnect prior to reading the request, and treat things as
> an abrupt exit rather than a clean shutdown (which may affect
> whether the server properly fsync()s data to disk, and so on).
> The new NBD_CMD_CLOSE adds another round of handshake, where
> the client waits for the server's action before closing, to
> make sure both parties know that it was a clean close rather
> than an accidental early disconnect.
> 
> In nbd-client.c, nbd_client_close() is called after we have
> already exited the normal coroutine context used by all the
> other transmission phase handlers, so the code is a bit more
> complex to build up a coroutine just for the purpose of waiting
> for the server's response.
> 
> Signed-off-by: Eric Blake <eblake@redhat.com>

Wouter is not yet convinced of the merits of NBD_CMD_CLOSE
so we should probably resist applying this unless / until we
have convinced him of its benefits.

BTW there is nothing to stop you doing an fsync() on ANY
disconnect server side.

Alex


> ---
> include/block/nbd.h |  4 +++-
> block/nbd-client.c  | 45 ++++++++++++++++++++++++++++++++++++++++++++-
> nbd/server.c        | 19 +++++++++++++++++--
> 3 files changed, 64 insertions(+), 4 deletions(-)
> 
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index d261dbc..4c57754 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply;
> #define NBD_FLAG_SEND_FUA       (1 << 3)        /* Send FUA (Force Unit Access) */
> #define NBD_FLAG_ROTATIONAL     (1 << 4)        /* Use elevator algorithm - rotational media */
> #define NBD_FLAG_SEND_TRIM      (1 << 5)        /* Send TRIM (discard) */
> +#define NBD_FLAG_SEND_CLOSE     (1 << 8)        /* Send CLOSE */
> 
> /* New-style handshake (global) flags, sent from server to client, and
>    control what will happen during handshake phase. */
> @@ -99,7 +100,8 @@ enum {
>     NBD_CMD_WRITE = 1,
>     NBD_CMD_DISC = 2,
>     NBD_CMD_FLUSH = 3,
> -    NBD_CMD_TRIM = 4
> +    NBD_CMD_TRIM = 4,
> +    NBD_CMD_CLOSE = 7,
> };
> 
> #define NBD_DEFAULT_PORT	10809
> diff --git a/block/nbd-client.c b/block/nbd-client.c
> index 285025d..f013084 100644
> --- a/block/nbd-client.c
> +++ b/block/nbd-client.c
> @@ -374,6 +374,29 @@ void nbd_client_attach_aio_context(BlockDriverState *bs,
>                        false, nbd_reply_ready, NULL, bs);
> }
> 
> +typedef struct NbdCloseCo {
> +    BlockDriverState *bs;
> +    nbd_request request;
> +    nbd_reply reply;
> +    bool done;
> +} NbdCloseCo;
> +
> +static void coroutine_fn nbd_client_close_co(void *opaque)
> +{
> +    NbdCloseCo *closeco = opaque;
> +    NbdClientSession *client = nbd_get_client_session(closeco->bs);
> +    ssize_t ret;
> +
> +    nbd_coroutine_start(client, &closeco->request);
> +    ret = nbd_co_send_request(closeco->bs, &closeco->request, NULL, 0);
> +    if (ret >= 0) {
> +        nbd_co_receive_reply(client, &closeco->request, &closeco->reply,
> +                             NULL, 0);
> +    }
> +    nbd_coroutine_end(client, &closeco->request);
> +    closeco->done = true;
> +}
> +
> void nbd_client_close(BlockDriverState *bs)
> {
>     NbdClientSession *client = nbd_get_client_session(bs);
> @@ -383,8 +406,28 @@ void nbd_client_close(BlockDriverState *bs)
>         return;
>     }
> 
> -    nbd_send_request(client->ioc, &request);
> +    if (client->nbdflags & NBD_FLAG_SEND_CLOSE) {
> +        /* Newer server, wants us to wait for reply before we close */
> +        Coroutine *co;
> +        NbdCloseCo closeco = {
> +            .bs = bs,
> +            .request = { .type = NBD_CMD_CLOSE },
> +        };
> +        AioContext *aio_context;
> 
> +        g_assert(!qemu_in_coroutine());
> +        aio_context = bdrv_get_aio_context(bs);
> +        co = qemu_coroutine_create(nbd_client_close_co);
> +        qemu_coroutine_enter(co, &closeco);
> +        while (!closeco.done) {
> +            aio_poll(aio_context, true);
> +        }
> +    } else {
> +        /* Older server, send request, but no reply will come */
> +        nbd_send_request(client->ioc, &request);
> +    }
> +
> +    /* Regardless of any received errors, the connection is done. */
>     nbd_teardown_connection(bs);
> }
> 
> diff --git a/nbd/server.c b/nbd/server.c
> index e68e83c..2a6eaf2 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -624,7 +624,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
>     char buf[8 + 8 + 8 + 128];
>     int rc;
>     const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
> -                              NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
> +                              NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
> +                              NBD_FLAG_SEND_CLOSE);
>     bool oldStyle;
>     size_t len;
> 
> @@ -1244,7 +1245,21 @@ static void nbd_trip(void *opaque)
>         break;
>     case NBD_CMD_DISC:
>         TRACE("Request type is DISCONNECT");
> -        errno = 0;
> +        goto out;
> +    case NBD_CMD_CLOSE:
> +        TRACE("Request type is CLOSE");
> +        if (request.flags || request.from || request.len) {
> +            LOG("bad parameters, skipping flush");
> +            reply.error = EINVAL;
> +        } else {
> +            ret = blk_co_flush(exp->blk);
> +            if (ret < 0) {
> +                LOG("flush failed");
> +                reply.error = -ret;
> +            }
> +        }
> +        /* Attempt to send reply, but even if it fails, we are done */
> +        nbd_co_send_reply(req, &reply, 0);
>         goto out;
>     case NBD_CMD_FLUSH:
>         TRACE("Request type is FLUSH");
> -- 
> 2.5.5
> 
>
Eric Blake April 9, 2016, 11:12 p.m. UTC | #2
On 04/09/2016 04:50 AM, Alex Bligh wrote:
> 
> On 8 Apr 2016, at 23:05, Eric Blake <eblake@redhat.com> wrote:
> 
>> NBD_CMD_DISC is annoying: the server is not required to reply,
>> so the client has no choice but to disconnect once it has sent
>> the message; but depending on timing, the server can see the
>> disconnect prior to reading the request, and treat things as
>> an abrupt exit rather than a clean shutdown (which may affect
>> whether the server properly fsync()s data to disk, and so on).
>> The new NBD_CMD_CLOSE adds another round of handshake, where
>> the client waits for the server's action before closing, to
>> make sure both parties know that it was a clean close rather
>> than an accidental early disconnect.
>>
>> In nbd-client.c, nbd_client_close() is called after we have
>> already exited the normal coroutine context used by all the
>> other transmission phase handlers, so the code is a bit more
>> complex to build up a coroutine just for the purpose of waiting
>> for the server's response.
>>
>> Signed-off-by: Eric Blake <eblake@redhat.com>
> 
> Wouter is not yet convinced of the merits of NBD_CMD_CLOSE
> so we should probably resist applying this unless / until we
> have convinced him of its benefits.
> 
> BTW there is nothing to stop you doing an fsync() on ANY
> disconnect server side.

Qemu clients _already_ do the safe actions of waiting for all inflight
requests to complete, then sending one final NBD_CMD_FLUSH, before
attempting to send NBD_CMD_DISC.  If I knew how to make qemu guarantee
that the NBD_CMD_DISC hits the wire (even in TLS mode) rather than being
dropped early, that seems nicer than having to implement this (although
I did learn a bit about qemu coroutines in implementing this).
Alex Bligh April 10, 2016, 5:28 a.m. UTC | #3
On 10 Apr 2016, at 00:12, Eric Blake <eblake@redhat.com> wrote:

> Qemu clients _already_ do the safe actions of waiting for all inflight
> requests to complete, then sending one final NBD_CMD_FLUSH, before
> attempting to send NBD_CMD_DISC.  If I knew how to make qemu guarantee
> that the NBD_CMD_DISC hits the wire (even in TLS mode) rather than being
> dropped early, that seems nicer than having to implement this (although
> I did learn a bit about qemu coroutines in implementing this).

Thanks. As discussed elsewhere, I think it's gnutls_bye() but I'm
more familiar with openssl and would be concerned that gnutls_bye()
might block.

--
Alex Bligh
diff mbox

Patch

diff --git a/include/block/nbd.h b/include/block/nbd.h
index d261dbc..4c57754 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -70,6 +70,7 @@  typedef struct nbd_reply nbd_reply;
 #define NBD_FLAG_SEND_FUA       (1 << 3)        /* Send FUA (Force Unit Access) */
 #define NBD_FLAG_ROTATIONAL     (1 << 4)        /* Use elevator algorithm - rotational media */
 #define NBD_FLAG_SEND_TRIM      (1 << 5)        /* Send TRIM (discard) */
+#define NBD_FLAG_SEND_CLOSE     (1 << 8)        /* Send CLOSE */

 /* New-style handshake (global) flags, sent from server to client, and
    control what will happen during handshake phase. */
@@ -99,7 +100,8 @@  enum {
     NBD_CMD_WRITE = 1,
     NBD_CMD_DISC = 2,
     NBD_CMD_FLUSH = 3,
-    NBD_CMD_TRIM = 4
+    NBD_CMD_TRIM = 4,
+    NBD_CMD_CLOSE = 7,
 };

 #define NBD_DEFAULT_PORT	10809
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 285025d..f013084 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -374,6 +374,29 @@  void nbd_client_attach_aio_context(BlockDriverState *bs,
                        false, nbd_reply_ready, NULL, bs);
 }

+typedef struct NbdCloseCo {
+    BlockDriverState *bs;
+    nbd_request request;
+    nbd_reply reply;
+    bool done;
+} NbdCloseCo;
+
+static void coroutine_fn nbd_client_close_co(void *opaque)
+{
+    NbdCloseCo *closeco = opaque;
+    NbdClientSession *client = nbd_get_client_session(closeco->bs);
+    ssize_t ret;
+
+    nbd_coroutine_start(client, &closeco->request);
+    ret = nbd_co_send_request(closeco->bs, &closeco->request, NULL, 0);
+    if (ret >= 0) {
+        nbd_co_receive_reply(client, &closeco->request, &closeco->reply,
+                             NULL, 0);
+    }
+    nbd_coroutine_end(client, &closeco->request);
+    closeco->done = true;
+}
+
 void nbd_client_close(BlockDriverState *bs)
 {
     NbdClientSession *client = nbd_get_client_session(bs);
@@ -383,8 +406,28 @@  void nbd_client_close(BlockDriverState *bs)
         return;
     }

-    nbd_send_request(client->ioc, &request);
+    if (client->nbdflags & NBD_FLAG_SEND_CLOSE) {
+        /* Newer server, wants us to wait for reply before we close */
+        Coroutine *co;
+        NbdCloseCo closeco = {
+            .bs = bs,
+            .request = { .type = NBD_CMD_CLOSE },
+        };
+        AioContext *aio_context;

+        g_assert(!qemu_in_coroutine());
+        aio_context = bdrv_get_aio_context(bs);
+        co = qemu_coroutine_create(nbd_client_close_co);
+        qemu_coroutine_enter(co, &closeco);
+        while (!closeco.done) {
+            aio_poll(aio_context, true);
+        }
+    } else {
+        /* Older server, send request, but no reply will come */
+        nbd_send_request(client->ioc, &request);
+    }
+
+    /* Regardless of any received errors, the connection is done. */
     nbd_teardown_connection(bs);
 }

diff --git a/nbd/server.c b/nbd/server.c
index e68e83c..2a6eaf2 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -624,7 +624,8 @@  static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
     char buf[8 + 8 + 8 + 128];
     int rc;
     const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
-                              NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
+                              NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
+                              NBD_FLAG_SEND_CLOSE);
     bool oldStyle;
     size_t len;

@@ -1244,7 +1245,21 @@  static void nbd_trip(void *opaque)
         break;
     case NBD_CMD_DISC:
         TRACE("Request type is DISCONNECT");
-        errno = 0;
+        goto out;
+    case NBD_CMD_CLOSE:
+        TRACE("Request type is CLOSE");
+        if (request.flags || request.from || request.len) {
+            LOG("bad parameters, skipping flush");
+            reply.error = EINVAL;
+        } else {
+            ret = blk_co_flush(exp->blk);
+            if (ret < 0) {
+                LOG("flush failed");
+                reply.error = -ret;
+            }
+        }
+        /* Attempt to send reply, but even if it fails, we are done */
+        nbd_co_send_reply(req, &reply, 0);
         goto out;
     case NBD_CMD_FLUSH:
         TRACE("Request type is FLUSH");