diff mbox series

[libnbd,08/13] block_status: Track 64-bit extents internally

Message ID 20211203231741.3901263-9-eblake@redhat.com (mailing list archive)
State New, archived
Headers show
Series spec: Add NBD_OPT_EXTENDED_HEADERS | expand

Commit Message

Eric Blake Dec. 3, 2021, 11:17 p.m. UTC
When extended headers are in use, the server can send us 64-bit
extents, even for a 32-bit query (if the server knows the entire image
is data, for example).  For maximum flexibility, we are thus better
off storing 64-bit lengths internally, even if we have to convert it
back to 32-bit lengths when invoking the user's 32-bit callback.  The
next patch will then add a new API for letting the user access the
full 64-bit extent information.  The goal is to let both APIs work all
the time, regardless of the size extents that the server actually
answered with.

Note that when using the old nbd_block_status() API with a server that
lacks extended headers, we now add a double-conversion speed penalty
(converting the server's 32-bit answer into 64-bit internally and back
to 32-bit for the callback).  But the speed penalty will not be a
problem for applications using the new nbd_block_status_64() API (we
have to give a 64-bit answer no matter what the server answered), and
ideally the situation will become less common as more servers learn
extended headers.  So for now I chose to unconditionally use a 64-bit
internal representation; but if it turns out to have noticeable
degredation, we could tweak things to conditionally retain 32-bit
internal representation for servers lacking extended headers at the
expense of more code maintenance.

One of the trickier aspects of this patch is auditing that both the
user's extent and our malloc'd shim get cleaned up once on all
possible paths, so that there is neither a leak nor a double free.
---
 lib/internal.h                      |  7 +++-
 generator/states-reply-structured.c | 31 ++++++++++-----
 lib/handle.c                        |  4 +-
 lib/rw.c                            | 59 ++++++++++++++++++++++++++++-
 4 files changed, 85 insertions(+), 16 deletions(-)
diff mbox series

Patch

diff --git a/lib/internal.h b/lib/internal.h
index 06f3a65c..4800df83 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -75,7 +75,7 @@  struct export {

 struct command_cb {
   union {
-    nbd_extent_callback extent;
+    nbd_extent64_callback extent;
     nbd_chunk_callback chunk;
     nbd_list_callback list;
     nbd_context_callback context;
@@ -286,7 +286,10 @@  struct nbd_handle {

   /* When receiving block status, this is used. */
   uint32_t bs_contextid;
-  uint32_t *bs_entries;
+  union {
+    nbd_extent *normal; /* Our 64-bit preferred internal form */
+    uint32_t *narrow;   /* 32-bit form of NBD_REPLY_TYPE_BLOCK_STATUS */
+  } bs_entries;

   /* Commands which are waiting to be issued [meaning the request
    * packet is sent to the server].  This is used as a simple linked
diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c
index a3e0e2ac..71c761e9 100644
--- a/generator/states-reply-structured.c
+++ b/generator/states-reply-structured.c
@@ -494,6 +494,7 @@  STATE_MACHINE {
  REPLY.STRUCTURED_REPLY.RECV_BS_CONTEXTID:
   struct command *cmd = h->reply_cmd;
   uint32_t length;
+  uint32_t count;

   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return 0;
@@ -508,15 +509,19 @@  STATE_MACHINE {
     assert (cmd->type == NBD_CMD_BLOCK_STATUS);
     assert (length >= 12);
     length -= sizeof h->bs_contextid;
+    count = length / (2 * sizeof (uint32_t));

-    free (h->bs_entries);
-    h->bs_entries = malloc (length);
-    if (h->bs_entries == NULL) {
+    /* Read raw data into a subset of h->bs_entries, then expand it
+     * into place later later during byte-swapping.
+     */
+    free (h->bs_entries.normal);
+    h->bs_entries.normal = malloc (count * sizeof *h->bs_entries.normal);
+    if (h->bs_entries.normal == NULL) {
       SET_NEXT_STATE (%.DEAD);
       set_error (errno, "malloc");
       return 0;
     }
-    h->rbuf = h->bs_entries;
+    h->rbuf = h->bs_entries.narrow;
     h->rlen = length;
     SET_NEXT_STATE (%RECV_BS_ENTRIES);
   }
@@ -528,6 +533,7 @@  STATE_MACHINE {
   uint32_t count;
   size_t i;
   uint32_t context_id;
+  uint32_t *raw;
   struct meta_context *meta_context;

   switch (recv_into_rbuf (h)) {
@@ -542,15 +548,20 @@  STATE_MACHINE {
     assert (cmd); /* guaranteed by CHECK */
     assert (cmd->type == NBD_CMD_BLOCK_STATUS);
     assert (CALLBACK_IS_NOT_NULL (cmd->cb.fn.extent));
-    assert (h->bs_entries);
+    assert (h->bs_entries.normal);
     assert (length >= 12);
-    count = (length - sizeof h->bs_contextid) / sizeof *h->bs_entries;
+    count = (length - sizeof h->bs_contextid) / (2 * sizeof (uint32_t));

     /* Need to byte-swap the entries returned, but apart from that we
-     * don't validate them.
+     * don't validate them.  Reverse order is essential, since we are
+     * expanding in-place from narrow to wider type.
      */
-    for (i = 0; i < count; ++i)
-      h->bs_entries[i] = be32toh (h->bs_entries[i]);
+    raw = h->bs_entries.narrow;
+    for (i = count; i > 0; ) {
+      --i;
+      h->bs_entries.normal[i].flags = be32toh (raw[i * 2 + 1]);
+      h->bs_entries.normal[i].length = be32toh (raw[i * 2]);
+    }

     /* Look up the context ID. */
     context_id = be32toh (h->bs_contextid);
@@ -566,7 +577,7 @@  STATE_MACHINE {

       if (CALL_CALLBACK (cmd->cb.fn.extent,
                          meta_context->name, cmd->offset,
-                         h->bs_entries, count,
+                         h->bs_entries.normal, count,
                          &error) == -1)
         if (cmd->error == 0)
           cmd->error = error ? error : EPROTO;
diff --git a/lib/handle.c b/lib/handle.c
index cbb37e89..74fe87ec 100644
--- a/lib/handle.c
+++ b/lib/handle.c
@@ -1,5 +1,5 @@ 
 /* NBD client library in userspace
- * Copyright (C) 2013-2020 Red Hat Inc.
+ * Copyright (C) 2013-2021 Red Hat Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -123,7 +123,7 @@  nbd_close (struct nbd_handle *h)
   /* Free user callbacks first. */
   nbd_unlocked_clear_debug_callback (h);

-  free (h->bs_entries);
+  free (h->bs_entries.normal);
   nbd_internal_reset_size_and_flags (h);
   nbd_internal_free_option (h);
   free_cmd_list (h->cmds_to_issue);
diff --git a/lib/rw.c b/lib/rw.c
index 16c2e848..f36f4e15 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -42,6 +42,50 @@  wait_for_command (struct nbd_handle *h, int64_t cookie)
   return r == -1 ? -1 : 0;
 }

+/* Convert from 64-bit to 32-bit extent callback. */
+static int
+nbd_convert_extent (void *data, const char *metacontext, uint64_t offset,
+                    nbd_extent *entries, size_t nr_entries, int *error)
+{
+  nbd_extent_callback *cb = data;
+  uint32_t *array = malloc (nr_entries * 2 * sizeof *array);
+  size_t i;
+  int ret;
+
+  if (array == NULL) {
+    set_error (*error = errno, "malloc");
+    return -1;
+  }
+
+  for (i = 0; i < nr_entries; i++) {
+    array[i * 2] = entries[i].length;
+    array[i * 2 + 1] = entries[i].flags;
+    /* If an extent is larger than 32 bits, silently truncate the rest
+     * of the server's response.  Technically, such a server was
+     * non-compliant if the client did not negotiate extended headers,
+     * but it is easier to let the caller make progress than to make
+     * the call fail.  Rather than track the connection's alignment,
+     * just blindly truncate the large extent to 4G-64M.
+     */
+    if (entries[i].length > UINT32_MAX) {
+      array[i++ * 2] = -MAX_REQUEST_SIZE;
+      break;
+    }
+  }
+
+  ret = CALL_CALLBACK (*cb, metacontext, offset, array, i * 2, error);
+  free (array);
+  return ret;
+}
+
+static void
+nbd_convert_extent_free (void *data)
+{
+  nbd_extent_callback *cb = data;
+  FREE_CALLBACK (*cb);
+  free (cb);
+}
+
 /* Issue a read command and wait for the reply. */
 int
 nbd_unlocked_pread (struct nbd_handle *h, void *buf,
@@ -469,12 +513,23 @@  nbd_unlocked_aio_block_status (struct nbd_handle *h,
                                nbd_completion_callback *completion,
                                uint32_t flags)
 {
-  struct command_cb cb = { .fn.extent = *extent,
+  nbd_extent_callback *shim = malloc (sizeof *shim);
+  struct command_cb cb = { .fn.extent.callback = nbd_convert_extent,
+                           .fn.extent.user_data = shim,
+                           .fn.extent.free = nbd_convert_extent_free,
                            .completion = *completion };

+  if (shim == NULL) {
+    set_error (errno, "malloc");
+    return -1;
+  }
+  *shim = *extent;
+  SET_CALLBACK_TO_NULL (*extent);
+
   if (h->strict & LIBNBD_STRICT_COMMANDS) {
     if (!h->structured_replies) {
       set_error (ENOTSUP, "server does not support structured replies");
+      FREE_CALLBACK (cb.fn.extent);
       return -1;
     }

@@ -482,11 +537,11 @@  nbd_unlocked_aio_block_status (struct nbd_handle *h,
       set_error (ENOTSUP, "did not negotiate any metadata contexts, "
                  "either you did not call nbd_add_meta_context before "
                  "connecting or the server does not support it");
+      FREE_CALLBACK (cb.fn.extent);
       return -1;
     }
   }

-  SET_CALLBACK_TO_NULL (*extent);
   SET_CALLBACK_TO_NULL (*completion);
   return nbd_internal_command_common (h, flags, NBD_CMD_BLOCK_STATUS, offset,
                                       count, EINVAL, NULL, &cb);