diff mbox series

[libnbd,v2,05/23] states: Prepare to receive 64-bit replies

Message ID 20221114225158.2186742-6-eblake@redhat.com (mailing list archive)
State New, archived
Headers show
Series libnbd 64-bit NBD extensions | expand

Commit Message

Eric Blake Nov. 14, 2022, 10:51 p.m. UTC
Support receiving headers for 64-bit replies if extended headers were
negotiated.  We already insist that the server not send us too much
payload in one reply, so we can exploit that and merge the 64-bit
length back into a normalized 32-bit field for the rest of the payload
length calculations.  The NBD protocol specifically documents that
extended mode takes precedence over structured replies, and that there
are no simple replies in extended mode.  We can also take advantage
that the handle field is in the same offset in all the various reply
types.

Note that if we negotiate extended headers, but a non-compliant server
replies with a non-extended header, this patch will stall waiting for
the server to send more bytes rather than noticing that the magic
number is wrong (for aio operations, you'll get a magic number
mismatch once you send a second command that elicits a reply; but for
blocking operations, we basically deadlock).  The easy alternative
would be to read just the first 4 bytes of magic, then determine how
many more bytes to expect; but that would require more states and
syscalls, and not worth it since the typical server will be compliant.
The other alternative is what the next patch implements: teaching
REPLY.RECV_REPLY to handle short reads that were at least long enough
to transmit magic to specifically look for magic number mismatch.

At this point, h->extended_headers is permanently false (we can't
enable it until all other aspects of the protocol have likewise been
converted).
---
 lib/internal.h                      |  1 +
 generator/states-reply-structured.c | 52 +++++++++++++++++++----------
 generator/states-reply.c            | 31 +++++++++++------
 3 files changed, 57 insertions(+), 27 deletions(-)
diff mbox series

Patch

diff --git a/lib/internal.h b/lib/internal.h
index e900eca3..73fd24c0 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -242,6 +242,7 @@  struct nbd_handle {
       union {
         struct nbd_simple_reply simple;
         struct nbd_structured_reply structured;
+        struct nbd_extended_reply extended;
       } hdr;
       union {
         struct nbd_structured_reply_offset_data offset_data;
diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c
index 6f187f14..da9894c6 100644
--- a/generator/states-reply-structured.c
+++ b/generator/states-reply-structured.c
@@ -45,14 +45,20 @@  structured_reply_in_bounds (uint64_t offset, uint32_t length,

 STATE_MACHINE {
  REPLY.STRUCTURED_REPLY.START:
-  /* We've only read the simple_reply.  The structured_reply is longer,
-   * so read the remaining part.
+  /* If we have extended headers, we've already read the entire header.
+   * Otherwise, we've only read enough for a simple_reply; since structured
+   * replies are longer, read the remaining part.
    */
-  h->rbuf = &h->sbuf;
-  h->rbuf = (char *) h->rbuf + sizeof h->sbuf.reply.hdr.simple;
-  h->rlen = sizeof h->sbuf.reply.hdr.structured;
-  h->rlen -= sizeof h->sbuf.reply.hdr.simple;
-  SET_NEXT_STATE (%RECV_REMAINING);
+  if (h->extended_headers) {
+    assert (h->rbuf == sizeof h->sbuf.reply.hdr.extended + (char*) &h->sbuf);
+    SET_NEXT_STATE (%CHECK);
+  }
+  else {
+    assert (h->rbuf == sizeof h->sbuf.reply.hdr.simple + (char*) &h->sbuf);
+    h->rlen = sizeof h->sbuf.reply.hdr.structured -
+      sizeof h->sbuf.reply.hdr.simple;
+    SET_NEXT_STATE (%RECV_REMAINING);
+  }
   return 0;

  REPLY.STRUCTURED_REPLY.RECV_REMAINING:
@@ -69,11 +75,18 @@   REPLY.STRUCTURED_REPLY.RECV_REMAINING:
  REPLY.STRUCTURED_REPLY.CHECK:
   struct command *cmd = h->reply_cmd;
   uint16_t flags, type;
-  uint32_t length;
+  uint64_t length;
+  uint64_t offset = -1;

+  assert (cmd);
   flags = be16toh (h->sbuf.reply.hdr.structured.flags);
   type = be16toh (h->sbuf.reply.hdr.structured.type);
-  length = be32toh (h->sbuf.reply.hdr.structured.length);
+  if (h->extended_headers) {
+    length = be64toh (h->sbuf.reply.hdr.extended.length);
+    offset = be64toh (h->sbuf.reply.hdr.extended.offset);
+  }
+  else
+    length = be32toh (h->sbuf.reply.hdr.structured.length);

   /* Reject a server that replies with too much information, but don't
    * reject a single structured reply to NBD_CMD_READ on the largest
@@ -83,13 +96,18 @@   REPLY.STRUCTURED_REPLY.CHECK:
    * not worth keeping the connection alive.
    */
   if (length > MAX_REQUEST_SIZE + sizeof h->sbuf.reply.payload.offset_data) {
-    set_error (0, "invalid server reply length %" PRIu32, length);
+    set_error (0, "invalid server reply length %" PRIu64, length);
     SET_NEXT_STATE (%.DEAD);
     return 0;
   }
+  /* For convenience, we now normalize extended replies into compact,
+   * doable since we validated length fits in 32 bits.
+   */
+  h->sbuf.reply.hdr.structured.length = length;

   /* Skip an unexpected structured reply, including to an unknown cookie. */
-  if (cmd == NULL || !h->structured_replies)
+  if (cmd == NULL || !h->structured_replies ||
+      (h->extended_headers && offset != cmd->offset))
     goto resync;

   switch (type) {
@@ -168,7 +186,7 @@   REPLY.STRUCTURED_REPLY.RECV_ERROR:
     SET_NEXT_STATE (%.READY);
     return 0;
   case 0:
-    length = be32toh (h->sbuf.reply.hdr.structured.length);
+    length = h->sbuf.reply.hdr.structured.length; /* normalized in CHECK */
     assert (length >= sizeof h->sbuf.reply.payload.error.error.error);
     assert (cmd);

@@ -207,7 +225,7 @@   REPLY.STRUCTURED_REPLY.RECV_ERROR_MESSAGE:
     SET_NEXT_STATE (%.READY);
     return 0;
   case 0:
-    length = be32toh (h->sbuf.reply.hdr.structured.length);
+    length = h->sbuf.reply.hdr.structured.length; /* normalized in CHECK */
     msglen = be16toh (h->sbuf.reply.payload.error.error.len);
     type = be16toh (h->sbuf.reply.hdr.structured.type);

@@ -307,7 +325,7 @@   REPLY.STRUCTURED_REPLY.RECV_OFFSET_DATA:
     SET_NEXT_STATE (%.READY);
     return 0;
   case 0:
-    length = be32toh (h->sbuf.reply.hdr.structured.length);
+    length = h->sbuf.reply.hdr.structured.length; /* normalized in CHECK */
     offset = be64toh (h->sbuf.reply.payload.offset_data.offset);

     assert (cmd); /* guaranteed by CHECK */
@@ -346,7 +364,7 @@   REPLY.STRUCTURED_REPLY.RECV_OFFSET_DATA_DATA:
     SET_NEXT_STATE (%.READY);
     return 0;
   case 0:
-    length = be32toh (h->sbuf.reply.hdr.structured.length);
+    length = h->sbuf.reply.hdr.structured.length; /* normalized in CHECK */
     offset = be64toh (h->sbuf.reply.payload.offset_data.offset);

     assert (cmd); /* guaranteed by CHECK */
@@ -426,7 +444,7 @@   REPLY.STRUCTURED_REPLY.RECV_BS_HEADER:
     SET_NEXT_STATE (%.READY);
     return 0;
   case 0:
-    length = be32toh (h->sbuf.reply.hdr.structured.length);
+    length = h->sbuf.reply.hdr.structured.length; /* normalized in CHECK */

     assert (cmd); /* guaranteed by CHECK */
     assert (cmd->type == NBD_CMD_BLOCK_STATUS);
@@ -460,7 +478,7 @@   REPLY.STRUCTURED_REPLY.RECV_BS_ENTRIES:
     SET_NEXT_STATE (%.READY);
     return 0;
   case 0:
-    length = be32toh (h->sbuf.reply.hdr.structured.length);
+    length = h->sbuf.reply.hdr.structured.length; /* normalized in CHECK */

     assert (cmd); /* guaranteed by CHECK */
     assert (cmd->type == NBD_CMD_BLOCK_STATUS);
diff --git a/generator/states-reply.c b/generator/states-reply.c
index 845973a4..dde23b39 100644
--- a/generator/states-reply.c
+++ b/generator/states-reply.c
@@ -62,15 +62,19 @@   REPLY.START:
    */
   ssize_t r;

-  /* We read all replies initially as if they are simple replies, but
-   * check the magic in CHECK_SIMPLE_OR_STRUCTURED_REPLY below.
-   * This works because the structured_reply header is larger.
+  /* With extended headers, there is only one size to read, so we can do it
+   * all in one syscall.  But for older structured replies, we don't know if
+   * we have a simple or structured reply until we read the magic number,
+   * requiring a two-part read with CHECK_SIMPLE_OR_STRUCTURED_REPLY below.
    */
   assert (h->reply_cmd == NULL);
   assert (h->rlen == 0);

   h->rbuf = &h->sbuf.reply.hdr;
-  h->rlen = sizeof h->sbuf.reply.hdr.simple;
+  if (h->extended_headers)
+    h->rlen = sizeof h->sbuf.reply.hdr.extended;
+  else
+    h->rlen = sizeof h->sbuf.reply.hdr.simple;

   r = h->sock->ops->recv (h, h->sock, h->rbuf, h->rlen);
   if (r == -1) {
@@ -116,15 +120,22 @@   REPLY.CHECK_SIMPLE_OR_STRUCTURED_REPLY:
   uint64_t cookie;

   magic = be32toh (h->sbuf.reply.hdr.simple.magic);
-  if (magic == NBD_SIMPLE_REPLY_MAGIC) {
+  switch (magic) {
+  case NBD_SIMPLE_REPLY_MAGIC:
+    if (h->extended_headers)
+      goto invalid;
     SET_NEXT_STATE (%SIMPLE_REPLY.START);
-  }
-  else if (magic == NBD_STRUCTURED_REPLY_MAGIC) {
+    break;
+  case NBD_STRUCTURED_REPLY_MAGIC:
+  case NBD_EXTENDED_REPLY_MAGIC:
+    if ((magic == NBD_STRUCTURED_REPLY_MAGIC) == h->extended_headers)
+      goto invalid;
     SET_NEXT_STATE (%STRUCTURED_REPLY.START);
-  }
-  else {
+    break;
+  default:
+  invalid:
     SET_NEXT_STATE (%.DEAD); /* We've probably lost synchronization. */
-    set_error (0, "invalid reply magic 0x%" PRIx32, magic);
+    set_error (0, "invalid or unexpected reply magic 0x%" PRIx32, magic);
     return 0;
   }