diff mbox

[v3,36/44] nbd: Improve handling of shutdown requests

Message ID 1461368452-10389-37-git-send-email-eblake@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Eric Blake April 22, 2016, 11:40 p.m. UTC
NBD commit 6d34500b clarified how clients and servers are supposed
to behave before closing a connection. It added NBD_REP_ERR_SHUTDOWN
(for the server to announce it is about to go away during option
haggling, so the client should quit sending NBD_OPT_* other than
NBD_OPT_ABORT) and ESHUTDOWN (for the server to announce it is about
to go away during transmission, so the client should quit sending
NBD_CMD_* other than NBD_CMD_DISC).  It also clarified that
NBD_OPT_ABORT gets a reply, while NBD_CMD_DISC does not.

This patch merely adds the missing reply to NBD_OPT_ABORT and teaches
the client to recognize server errors.  Actually teaching the server
to send NBD_REP_ERR_SHUTDOWN or ESHUTDOWN would require knowing that
the server has been requested to shut down soon (maybe we could do
that by installing a SIGINT handler in qemu-nbd, which transitions
from RUNNING to a new state that waits for the client to react,
rather than just out-right quitting).

Signed-off-by: Eric Blake <eblake@redhat.com>
---
 include/block/nbd.h | 13 +++++++++----
 nbd/nbd-internal.h  |  1 +
 nbd/client.c        | 16 ++++++++++++++++
 nbd/server.c        | 16 +++++++++++++++-
 4 files changed, 41 insertions(+), 5 deletions(-)

Comments

Alex Bligh April 25, 2016, 9:47 a.m. UTC | #1
On 23 Apr 2016, at 00:40, Eric Blake <eblake@redhat.com> wrote:

> NBD commit 6d34500b clarified how clients and servers are supposed
> to behave before closing a connection. It added NBD_REP_ERR_SHUTDOWN
> (for the server to announce it is about to go away during option
> haggling, so the client should quit sending NBD_OPT_* other than
> NBD_OPT_ABORT) and ESHUTDOWN (for the server to announce it is about
> to go away during transmission, so the client should quit sending
> NBD_CMD_* other than NBD_CMD_DISC).  It also clarified that
> NBD_OPT_ABORT gets a reply, while NBD_CMD_DISC does not.
> 
> This patch merely adds the missing reply to NBD_OPT_ABORT and teaches
> the client to recognize server errors.  Actually teaching the server
> to send NBD_REP_ERR_SHUTDOWN or ESHUTDOWN would require knowing that
> the server has been requested to shut down soon (maybe we could do
> that by installing a SIGINT handler in qemu-nbd, which transitions
> from RUNNING to a new state that waits for the client to react,
> rather than just out-right quitting).
> 
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
> include/block/nbd.h | 13 +++++++++----
> nbd/nbd-internal.h  |  1 +
> nbd/client.c        | 16 ++++++++++++++++
> nbd/server.c        | 16 +++++++++++++++-
> 4 files changed, 41 insertions(+), 5 deletions(-)
> 
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index d707761..2fd1a67 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -82,12 +82,17 @@ typedef struct nbd_reply nbd_reply;
> #define NBD_FLAG_C_NO_ZEROES      (1 << 1) /* End handshake without zeroes. */
> 
> /* Reply types. */
> +#define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value))
> +
> #define NBD_REP_ACK             (1)             /* Data sending finished. */
> #define NBD_REP_SERVER          (2)             /* Export description. */
> -#define NBD_REP_ERR_UNSUP       ((UINT32_C(1) << 31) | 1) /* Unknown option. */
> -#define NBD_REP_ERR_POLICY      ((UINT32_C(1) << 31) | 2) /* Server denied */
> -#define NBD_REP_ERR_INVALID     ((UINT32_C(1) << 31) | 3) /* Invalid length. */
> -#define NBD_REP_ERR_TLS_REQD    ((UINT32_C(1) << 31) | 5) /* TLS required */
> +
> +#define NBD_REP_ERR_UNSUP       NBD_REP_ERR(1)  /* Unknown option. */
> +#define NBD_REP_ERR_POLICY      NBD_REP_ERR(2)  /* Server denied */
> +#define NBD_REP_ERR_INVALID     NBD_REP_ERR(3)  /* Invalid length */
> +#define NBD_REP_ERR_PLATFORM    NBD_REP_ERR(4)  /* Not compiled in */
> +#define NBD_REP_ERR_TLS_REQD    NBD_REP_ERR(5)  /* TLS required */
> +#define NBD_REP_ERR_SHUTDOWN    NBD_REP_ERR(7)  /* Server shutting down */
> 
> /* Request flags, sent from client to server during transmission phase */
> #define NBD_CMD_FLAG_FUA        (1 << 0)
> diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
> index 95069db..0d40b1f 100644
> --- a/nbd/nbd-internal.h
> +++ b/nbd/nbd-internal.h
> @@ -91,6 +91,7 @@
> #define NBD_ENOMEM     12
> #define NBD_EINVAL     22
> #define NBD_ENOSPC     28
> +#define NBD_ESHUTDOWN  108
> 
> static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size)
> {
> diff --git a/nbd/client.c b/nbd/client.c
> index 68e9473..4140d13 100644
> --- a/nbd/client.c
> +++ b/nbd/client.c
> @@ -34,6 +34,8 @@ static int nbd_errno_to_system_errno(int err)
>         return ENOMEM;
>     case NBD_ENOSPC:
>         return ENOSPC;
> +    case NBD_ESHUTDOWN:
> +        return ESHUTDOWN;
>     default:
>         TRACE("Squashing unexpected error %d to EINVAL", err);
>         /* fallthrough */
> @@ -210,11 +212,21 @@ static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply,
>                    reply->option);
>         break;
> 
> +    case NBD_REP_ERR_PLATFORM:
> +        error_setg(errp, "Server lacks support for option %" PRIx32,
> +                   reply->option);
> +        break;
> +
>     case NBD_REP_ERR_TLS_REQD:
>         error_setg(errp, "TLS negotiation required before option %" PRIx32,
>                    reply->option);
>         break;
> 
> +    case NBD_REP_ERR_SHUTDOWN:
> +        error_setg(errp, "Server shutting down before option %" PRIx32,
> +                   reply->option);
> +        break;
> +
>     default:
>         error_setg(errp, "Unknown error code when asking for option %" PRIx32,
>                    reply->option);
> @@ -754,6 +766,10 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply)
>         LOG("invalid magic (got 0x%" PRIx32 ")", magic);
>         return -EINVAL;
>     }
> +    if (reply->error == ESHUTDOWN) {
> +        LOG("server shutting down");
> +        return -EINVAL;
> +    }
>     return 0;
> }
> 
> diff --git a/nbd/server.c b/nbd/server.c
> index dadc928..fa6a994 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -39,6 +39,8 @@ static int system_errno_to_nbd_errno(int err)
>     case EFBIG:
>     case ENOSPC:
>         return NBD_ENOSPC;
> +    case ESHUTDOWN:
> +        return NBD_ESHUTDOWN;
>     case EINVAL:
>     default:
>         return NBD_EINVAL;
> @@ -484,6 +486,10 @@ static int nbd_negotiate_options(NBDClient *client)
>                 if (ret < 0) {
>                     return ret;
>                 }
> +                /* Let the client keep trying, unless they asked to quit */
> +                if (clientflags == NBD_OPT_ABORT) {

OK that's totally confusing. clientflags is not the client flags. clientflags
is the NBD option ID, which happens to be the two bytes after the NBD OPT magic,
which is the client flags if we were doing oldstyle negotiation, not newstyle
negotiation.

Except:

> +                    return -EINVAL;
> +                }
>                 break;
>             }
>         } else if (fixedNewstyle) {

So the above is for NewStyle (not fixedNewStyle)?

In which case more than one option isn't even supported, so what's the
stuff purporting to handle TLS doing there?

Confused ...

> @@ -496,7 +502,15 @@ static int nbd_negotiate_options(NBDClient *client)
>                 break;
> 
>             case NBD_OPT_ABORT:
> -                return -EINVAL;
> +                /* NBD spec says we must reply before disconnecting,
> +                 * but that we must also tolerate guests that don't
> +                 * wait for our reply. */
> +                ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
> +                                             clientflags);
> +                if (!ret) {
> +                    ret = -EINVAL;
> +                }
> +                return ret;
> 
>             case NBD_OPT_EXPORT_NAME:
>                 return nbd_negotiate_handle_export_name(client, length);
> -- 
> 2.5.5
> 
>
Eric Blake April 25, 2016, 7:20 p.m. UTC | #2
On 04/25/2016 03:47 AM, Alex Bligh wrote:
> 
> On 23 Apr 2016, at 00:40, Eric Blake <eblake@redhat.com> wrote:
> 
>> NBD commit 6d34500b clarified how clients and servers are supposed
>> to behave before closing a connection. It added NBD_REP_ERR_SHUTDOWN
>> (for the server to announce it is about to go away during option
>> haggling, so the client should quit sending NBD_OPT_* other than
>> NBD_OPT_ABORT) and ESHUTDOWN (for the server to announce it is about
>> to go away during transmission, so the client should quit sending
>> NBD_CMD_* other than NBD_CMD_DISC).  It also clarified that
>> NBD_OPT_ABORT gets a reply, while NBD_CMD_DISC does not.
>>
>> This patch merely adds the missing reply to NBD_OPT_ABORT and teaches
>> the client to recognize server errors.  Actually teaching the server
>> to send NBD_REP_ERR_SHUTDOWN or ESHUTDOWN would require knowing that
>> the server has been requested to shut down soon (maybe we could do
>> that by installing a SIGINT handler in qemu-nbd, which transitions
>> from RUNNING to a new state that waits for the client to react,
>> rather than just out-right quitting).
>>
>> Signed-off-by: Eric Blake <eblake@redhat.com>
>> ---

>> @@ -484,6 +486,10 @@ static int nbd_negotiate_options(NBDClient *client)
>>                 if (ret < 0) {
>>                     return ret;
>>                 }
>> +                /* Let the client keep trying, unless they asked to quit */
>> +                if (clientflags == NBD_OPT_ABORT) {
> 
> OK that's totally confusing. clientflags is not the client flags. clientflags
> is the NBD option ID, which happens to be the two bytes after the NBD OPT magic,
> which is the client flags if we were doing oldstyle negotiation, not newstyle
> negotiation.

Yes, 'clientflags' is a poor name; I should rename it in a separate
patch.   It is the option-negotiation command type.

> 
> Except:
> 
>> +                    return -EINVAL;
>> +                }
>>                 break;
>>             }
>>         } else if (fixedNewstyle) {
> 
> So the above is for NewStyle (not fixedNewStyle)?

The above is for fixedNewStyle when TLS is not negotiated yet; the 'else
if' is for fixedNewStyle after TLS has been negotiated.  Prior to TLS,
we have to special-case NBD_OPT_ABORT and actually quit.

> 
> In which case more than one option isn't even supported, so what's the
> stuff purporting to handle TLS doing there?
> 
> Confused ...

Sounds like a cleanup patch as a prerequisite on my next respin would
help, then.
Alex Bligh April 25, 2016, 7:40 p.m. UTC | #3
On 25 Apr 2016, at 20:20, Eric Blake <eblake@redhat.com> wrote:

>>>        } else if (fixedNewstyle) {
>> 
>> So the above is for NewStyle (not fixedNewStyle)?
> 
> The above is for fixedNewStyle when TLS is not negotiated yet; the 'else
> if' is for fixedNewStyle after TLS has been negotiated.  Prior to TLS,
> we have to special-case NBD_OPT_ABORT and actually quit.

OK. fixedNewStyle is defined as a prerequisite for TLS. I'm hoping
nothing in Qemuland ever did non-fixed NewStyle, and assuming that's
the case I would not even support it (certainly it won't play
nicely with all the other stuff you've been doing).

--
Alex Bligh
Eric Blake April 25, 2016, 7:48 p.m. UTC | #4
On 04/25/2016 01:40 PM, Alex Bligh wrote:
> 
> On 25 Apr 2016, at 20:20, Eric Blake <eblake@redhat.com> wrote:
> 
>>>>        } else if (fixedNewstyle) {
>>>
>>> So the above is for NewStyle (not fixedNewStyle)?
>>
>> The above is for fixedNewStyle when TLS is not negotiated yet; the 'else
>> if' is for fixedNewStyle after TLS has been negotiated.  Prior to TLS,
>> we have to special-case NBD_OPT_ABORT and actually quit.
> 
> OK. fixedNewStyle is defined as a prerequisite for TLS. I'm hoping
> nothing in Qemuland ever did non-fixed NewStyle, and assuming that's
> the case I would not even support it (certainly it won't play
> nicely with all the other stuff you've been doing).

Well, there were some last-minute patches that went into the 2.6
candidates that fixed qemu to actually be a fixedNewStyle server
(without commit 156f6a10, for example, qemu had the very bug of
disconnecting on unknown client options that fixedNewStyle was supposed
to prevent). Fortunately, qemu 2.5 is oldstyle only, and qemu 2.6 is the
first newstyle server, and I think I got the worst of the
interoperability bugs nailed in 2.6, whereas this series is focusing on
the feature enhancements for inclusion in 2.7.
diff mbox

Patch

diff --git a/include/block/nbd.h b/include/block/nbd.h
index d707761..2fd1a67 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -82,12 +82,17 @@  typedef struct nbd_reply nbd_reply;
 #define NBD_FLAG_C_NO_ZEROES      (1 << 1) /* End handshake without zeroes. */

 /* Reply types. */
+#define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value))
+
 #define NBD_REP_ACK             (1)             /* Data sending finished. */
 #define NBD_REP_SERVER          (2)             /* Export description. */
-#define NBD_REP_ERR_UNSUP       ((UINT32_C(1) << 31) | 1) /* Unknown option. */
-#define NBD_REP_ERR_POLICY      ((UINT32_C(1) << 31) | 2) /* Server denied */
-#define NBD_REP_ERR_INVALID     ((UINT32_C(1) << 31) | 3) /* Invalid length. */
-#define NBD_REP_ERR_TLS_REQD    ((UINT32_C(1) << 31) | 5) /* TLS required */
+
+#define NBD_REP_ERR_UNSUP       NBD_REP_ERR(1)  /* Unknown option. */
+#define NBD_REP_ERR_POLICY      NBD_REP_ERR(2)  /* Server denied */
+#define NBD_REP_ERR_INVALID     NBD_REP_ERR(3)  /* Invalid length */
+#define NBD_REP_ERR_PLATFORM    NBD_REP_ERR(4)  /* Not compiled in */
+#define NBD_REP_ERR_TLS_REQD    NBD_REP_ERR(5)  /* TLS required */
+#define NBD_REP_ERR_SHUTDOWN    NBD_REP_ERR(7)  /* Server shutting down */

 /* Request flags, sent from client to server during transmission phase */
 #define NBD_CMD_FLAG_FUA        (1 << 0)
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 95069db..0d40b1f 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -91,6 +91,7 @@ 
 #define NBD_ENOMEM     12
 #define NBD_EINVAL     22
 #define NBD_ENOSPC     28
+#define NBD_ESHUTDOWN  108

 static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size)
 {
diff --git a/nbd/client.c b/nbd/client.c
index 68e9473..4140d13 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -34,6 +34,8 @@  static int nbd_errno_to_system_errno(int err)
         return ENOMEM;
     case NBD_ENOSPC:
         return ENOSPC;
+    case NBD_ESHUTDOWN:
+        return ESHUTDOWN;
     default:
         TRACE("Squashing unexpected error %d to EINVAL", err);
         /* fallthrough */
@@ -210,11 +212,21 @@  static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply,
                    reply->option);
         break;

+    case NBD_REP_ERR_PLATFORM:
+        error_setg(errp, "Server lacks support for option %" PRIx32,
+                   reply->option);
+        break;
+
     case NBD_REP_ERR_TLS_REQD:
         error_setg(errp, "TLS negotiation required before option %" PRIx32,
                    reply->option);
         break;

+    case NBD_REP_ERR_SHUTDOWN:
+        error_setg(errp, "Server shutting down before option %" PRIx32,
+                   reply->option);
+        break;
+
     default:
         error_setg(errp, "Unknown error code when asking for option %" PRIx32,
                    reply->option);
@@ -754,6 +766,10 @@  ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply)
         LOG("invalid magic (got 0x%" PRIx32 ")", magic);
         return -EINVAL;
     }
+    if (reply->error == ESHUTDOWN) {
+        LOG("server shutting down");
+        return -EINVAL;
+    }
     return 0;
 }

diff --git a/nbd/server.c b/nbd/server.c
index dadc928..fa6a994 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -39,6 +39,8 @@  static int system_errno_to_nbd_errno(int err)
     case EFBIG:
     case ENOSPC:
         return NBD_ENOSPC;
+    case ESHUTDOWN:
+        return NBD_ESHUTDOWN;
     case EINVAL:
     default:
         return NBD_EINVAL;
@@ -484,6 +486,10 @@  static int nbd_negotiate_options(NBDClient *client)
                 if (ret < 0) {
                     return ret;
                 }
+                /* Let the client keep trying, unless they asked to quit */
+                if (clientflags == NBD_OPT_ABORT) {
+                    return -EINVAL;
+                }
                 break;
             }
         } else if (fixedNewstyle) {
@@ -496,7 +502,15 @@  static int nbd_negotiate_options(NBDClient *client)
                 break;

             case NBD_OPT_ABORT:
-                return -EINVAL;
+                /* NBD spec says we must reply before disconnecting,
+                 * but that we must also tolerate guests that don't
+                 * wait for our reply. */
+                ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
+                                             clientflags);
+                if (!ret) {
+                    ret = -EINVAL;
+                }
+                return ret;

             case NBD_OPT_EXPORT_NAME:
                 return nbd_negotiate_handle_export_name(client, length);