diff mbox series

[libnbd,v2,23/23] RFC: pread: Accept 64-bit holes

Message ID 20221114225158.2186742-24-eblake@redhat.com (mailing list archive)
State New, archived
Headers show
Series libnbd 64-bit NBD extensions | expand

Commit Message

Eric Blake Nov. 14, 2022, 10:51 p.m. UTC
Even though we don't currently allow the user to request NBD_CMD_READ
with more than 64M (and even if we did, our API signature caps us at
SIZE_MAX, which is 32 bits on a 32-bit machine), upstream NBD commit
XXX[*] states that for symmetry with 64-bit requests, extended header
clients must be prepared for a server response with a 64-bit hole,
even if the client never makes a read request that large.  Note that
we don't have to change the signature of the callback for
nbd_pread_structured; nor is it worth adding a 64-bit counterpart to
LIBNBD_READ_HOLE, because it is unlikely that a user callback will
ever need to distinguish between which size was sent over the wire,
when the value is always less than 32 bits.  Also note that the recent
NBD spec changes to add 64-bits did state that servers may allow
clients to request a read of larger than the max block size, but if
such read is not rejected with EOVERFLOW or EINVAL, then the reply
will be divided into chunks so that no chunk sends more than a max
block size payload.

While we cannot guarantee which size structured reply the server will
use, it is easy enough to handle both sizes, but tag the read with
EPROTO for a non-compliant server that sends wide replies when
extended headers were not negotiated.  A more likely reason that a
server may choose to send 64-bit hole chunks even for a 32-bit hole is
because the extended hole payload has nicer power-of-2 sizing.

---

[*] FIXME: Update this with the actual commit id, if upstream NBD even
goes with this option.
---
 lib/internal.h                      |  1 +
 lib/nbd-protocol.h                  |  6 +++++
 generator/states-reply-structured.c | 36 ++++++++++++++++++++++++-----
 3 files changed, 37 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/lib/internal.h b/lib/internal.h
index ac8d99c4..64d1941c 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -250,6 +250,7 @@  struct nbd_handle {
       union {
         struct nbd_structured_reply_offset_data offset_data;
         struct nbd_structured_reply_offset_hole offset_hole;
+        struct nbd_structured_reply_offset_hole_ext offset_hole_ext;
         struct nbd_structured_reply_block_status_hdr bs_hdr;
         struct nbd_structured_reply_block_status_ext_hdr bs_ext_hdr;
         struct {
diff --git a/lib/nbd-protocol.h b/lib/nbd-protocol.h
index 2d1fabd0..2d1a3202 100644
--- a/lib/nbd-protocol.h
+++ b/lib/nbd-protocol.h
@@ -247,6 +247,11 @@  struct nbd_structured_reply_offset_hole {
   uint32_t length;              /* Length of hole. */
 } NBD_ATTRIBUTE_PACKED;

+struct nbd_structured_reply_offset_hole_ext {
+  uint64_t offset;
+  uint64_t length;              /* Length of hole. */
+} NBD_ATTRIBUTE_PACKED;
+
 /* NBD_REPLY_TYPE_BLOCK_STATUS block descriptor. */
 struct nbd_block_descriptor {
   uint32_t length;              /* length of block */
@@ -292,6 +297,7 @@  struct nbd_structured_reply_error {
 #define NBD_REPLY_TYPE_NONE             0
 #define NBD_REPLY_TYPE_OFFSET_DATA      1
 #define NBD_REPLY_TYPE_OFFSET_HOLE      2
+#define NBD_REPLY_TYPE_OFFSET_HOLE_EXT  3
 #define NBD_REPLY_TYPE_BLOCK_STATUS     5
 #define NBD_REPLY_TYPE_BLOCK_STATUS_EXT 6
 #define NBD_REPLY_TYPE_ERROR            NBD_REPLY_TYPE_ERR (1)
diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c
index 7e313b5a..e338bf74 100644
--- a/generator/states-reply-structured.c
+++ b/generator/states-reply-structured.c
@@ -28,15 +28,16 @@ 
  * requesting command.
  */
 static bool
-structured_reply_in_bounds (uint64_t offset, uint32_t length,
+structured_reply_in_bounds (uint64_t offset, uint64_t length,
                             const struct command *cmd)
 {
   if (offset < cmd->offset ||
       offset >= cmd->offset + cmd->count ||
-      offset + length > cmd->offset + cmd->count) {
+      length > cmd->offset + cmd->count ||
+      offset > cmd->offset + cmd->count - length) {
     set_error (0, "range of structured reply is out of bounds, "
                "offset=%" PRIu64 ", cmd->offset=%" PRIu64 ", "
-               "length=%" PRIu32 ", cmd->count=%" PRIu64 ": "
+               "length=%" PRIu64 ", cmd->count=%" PRIu64 ": "
                "this is likely to be a bug in the NBD server",
                offset, cmd->offset, length, cmd->count);
     return false;
@@ -141,6 +142,21 @@   REPLY.STRUCTURED_REPLY.CHECK:
     SET_NEXT_STATE (%RECV_OFFSET_HOLE);
     break;

+  case NBD_REPLY_TYPE_OFFSET_HOLE_EXT:
+    if (cmd->type != NBD_CMD_READ ||
+        length != sizeof h->sbuf.reply.payload.offset_hole_ext)
+      goto resync;
+    if (!h->extended_headers) {
+      debug (h, "unexpected 64-bit hole without extended headers, "
+             "this is probably a server bug");
+      if (cmd->error == 0)
+        cmd->error = EPROTO;
+    }
+    h->rbuf = &h->sbuf.reply.payload.offset_hole_ext;
+    h->rlen = sizeof h->sbuf.reply.payload.offset_hole_ext;
+    SET_NEXT_STATE (%RECV_OFFSET_HOLE);
+    break;
+
   case NBD_REPLY_TYPE_BLOCK_STATUS:
     if (cmd->type != NBD_CMD_BLOCK_STATUS ||
         length < 12 || ((length-4) & 7) != 0)
@@ -406,7 +422,8 @@   REPLY.STRUCTURED_REPLY.RECV_OFFSET_DATA_DATA:
  REPLY.STRUCTURED_REPLY.RECV_OFFSET_HOLE:
   struct command *cmd = h->reply_cmd;
   uint64_t offset;
-  uint32_t length;
+  uint64_t length;
+  uint16_t type;

   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return 0;
@@ -416,10 +433,14 @@   REPLY.STRUCTURED_REPLY.RECV_OFFSET_HOLE:
     return 0;
   case 0:
     offset = be64toh (h->sbuf.reply.payload.offset_hole.offset);
-    length = be32toh (h->sbuf.reply.payload.offset_hole.length);
+    type = be16toh (h->sbuf.reply.hdr.structured.type);
+
+    if (type == NBD_REPLY_TYPE_OFFSET_HOLE)
+      length = be32toh (h->sbuf.reply.payload.offset_hole.length);
+    else
+      length = be64toh (h->sbuf.reply.payload.offset_hole_ext.length);

     assert (cmd); /* guaranteed by CHECK */
-
     assert (cmd->data && cmd->type == NBD_CMD_READ);

     /* Is the data within bounds? */
@@ -435,7 +456,10 @@   REPLY.STRUCTURED_REPLY.RECV_OFFSET_HOLE:
     /* The spec states that 0-length requests are unspecified, but
      * 0-length replies are broken. Still, it's easy enough to support
      * them as an extension, and this works even when length == 0.
+     * Although length is 64 bits, the bounds check above ensures that
+     * it is no larger than the 64M cap we put on NBD_CMD_READ.
      */
+    assert (length <= SIZE_MAX);
     if (!cmd->initialized)
       memset ((char *) cmd->data + offset, 0, length);
     if (CALLBACK_IS_NOT_NULL (cmd->cb.fn.chunk)) {