diff mbox series

[libnbd,v2,22/23] api: Add nbd_[aio_]block_status_filter()

Message ID 20221114225158.2186742-23-eblake@redhat.com (mailing list archive)
State New, archived
Headers show
Series libnbd 64-bit NBD extensions | expand

Commit Message

Eric Blake Nov. 14, 2022, 10:51 p.m. UTC
As part of extending NBD to support 64-bit lengths, the protocol also
added an option for servers to allow clients to request filtered
responses to NBD_CMD_BLOCK_STATUS when more than one meta-context is
negotiated (see NBD commit XXX[*]).  At the same time as this patch,
qemu-nbd was taught to support and advertise this feature as a server,
but does not utilize it as a client (qemu doesn't yet need to connect
to multiple contexts at once).  Thus, addding generic client support
and enhancing the interop/ test in libnbd is needed to prove that the
feature is viable and worth standardizing.

---
[*] FIXME with actual commit id
---
 lib/internal.h                   |   5 +-
 generator/API.ml                 |  71 +++++++++++++++--
 generator/states-issue-command.c |   4 +-
 lib/aio.c                        |   7 +-
 lib/rw.c                         | 127 ++++++++++++++++++++++++++++++-
 interop/block-status-payload.c   | 117 +++++++++++++++++++++++++++-
 interop/block-status-payload.sh  |  14 +++-
 info/info-can.sh                 |   3 +
 8 files changed, 336 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/lib/internal.h b/lib/internal.h
index 1abb21cb..ac8d99c4 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -73,6 +73,8 @@  struct meta_context {
 };
 DEFINE_VECTOR_TYPE(meta_vector, struct meta_context);

+DEFINE_VECTOR_TYPE(uint32_vector, uint32_t);
+
 struct export {
   char *name;
   char *description;
@@ -379,7 +381,8 @@  struct command {
   uint64_t cookie;
   uint64_t offset;
   uint64_t count;
-  void *data; /* Buffer for read/write */
+  void *data; /* Buffer for read/write, uint32_vector* for status payload */
+  uint32_vector *ids; /* For block status with payload */
   struct command_cb cb;
   bool initialized; /* For read, true if getting a hole may skip memset */
   uint32_t data_seen; /* For read, cumulative size of data chunks seen */
diff --git a/generator/API.ml b/generator/API.ml
index bbf7c0bb..6bf67de0 100644
--- a/generator/API.ml
+++ b/generator/API.ml
@@ -2287,12 +2287,13 @@    "can_block_status_payload", {
     longdesc = "\
 Returns true if the server supports the use of the
 C<LIBNBD_CMD_FLAG_PAYLOAD_LEN> flag to allow filtering of the
-block status command.  Returns
+block status command (see L<nbd_block_status_filter(3)>).  Returns
 false if the server does not.  Note that this will never return
 true if L<nbd_get_extended_headers_negotiated(3)> is false."
 ^ non_blocking_test_call_description;
     see_also = [SectionLink "Flag calls"; Link "opt_info";
-                Link "get_extended_headers_negotiated"];
+                Link "get_extended_headers_negotiated";
+                Link "block_status_filter"];
     example = Some "examples/server-flags.c";
   };

@@ -2361,6 +2362,10 @@    "can_meta_context", {
 meta contexts were requested but there is a missing or failed
 attempt at NBD_OPT_SET_META_CONTEXT during option negotiation.

+If the server supports block status filtering (see
+L<nbd_can_block_status_payload(3)>, this function must return
+true for any filter name passed to L<nbd_block_status_filter(3)>.
+
 The single parameter is the name of the metadata context,
 for example C<LIBNBD_CONTEXT_BASE_ALLOCATION>.
 B<E<lt>libnbd.hE<gt>> includes defined constants for well-known
@@ -2893,9 +2898,12 @@    "block_status_64", {
 information about blocks beginning from the specified
 offset to be returned. The C<count> parameter is a hint: the
 server may choose to return less status, or the final block
-may extend beyond the requested range. If multiple contexts
+may extend beyond the requested range. When multiple contexts
 are supported, the number of blocks and cumulative length
-of those blocks need not be identical between contexts.
+of those blocks need not be identical between contexts; this
+command generally returns the status of all negotiated contexts,
+while some servers also support a filtered request (see
+L<nbd_can_block_status_payload(3)>, L<nbd_block_status_filter(3)>).

 Note that not all servers can support a C<count> of 4GiB or larger;
 L<nbd_get_extended_headers_negotiated(3)> indicates which servers
@@ -2945,11 +2953,38 @@    "block_status_64", {
 does not exceed C<count> bytes; however, libnbd does not
 validate that the server obeyed the flag."
 ^ strict_call_description;
-    see_also = [Link "block_status";
+    see_also = [Link "block_status"; Link "block_status_filter";
                 Link "add_meta_context"; Link "can_meta_context";
                 Link "aio_block_status_64"; Link "set_strict_mode"];
   };

+  "block_status_filter", {
+    default_call with
+    args = [ UInt64 "count"; UInt64 "offset"; StringList "contexts";
+             Closure extent64_closure ];
+    optargs = [ OFlags ("flags", cmd_flags, Some ["REQ_ONE"; "PAYLOAD_LEN"]) ];
+    ret = RErr;
+    permitted_states = [ Connected ];
+    shortdesc = "send filtered block status command, with 64-bit callback";
+    longdesc = "\
+Issue a filtered block status command to the NBD server.  If
+supported by the server (see L<nbd_can_block_status_payload(3)>),
+this causes metadata context information about blocks beginning
+from the specified offset to be returned, and with the result
+limited to just the contexts specified in C<filter>.  Note that
+all strings in C<filter> must be supported by
+L<nbd_can_meta_context(3)>.
+
+All other parameters to this function have the same semantics
+as in L<nbd_block_status_64(3)>; except that for convenience,
+the C<flags> parameter may additionally contain or omit
+C<LIBNBD_CMD_FLAG_PAYLOAD_LEN>."
+^ strict_call_description;
+    see_also = [Link "block_status_64";
+                Link "can_block_status_payload"; Link "can_meta_context";
+                Link "aio_block_status_filter"; Link "set_strict_mode"];
+  };
+
   "poll", {
     default_call with
     args = [ Int "timeout" ]; ret = RInt;
@@ -3619,6 +3654,30 @@    "aio_block_status_64", {
                 Link "set_strict_mode"];
   };

+  "aio_block_status_filter", {
+    default_call with
+    args = [ UInt64 "count"; UInt64 "offset"; StringList "contexts";
+             Closure extent64_closure ];
+    optargs = [ OClosure completion_closure;
+                OFlags ("flags", cmd_flags, Some ["REQ_ONE"; "PAYLOAD_LEN"]) ];
+    ret = RCookie;
+    permitted_states = [ Connected ];
+    shortdesc = "send filtered block status command to the NBD server";
+    longdesc = "\
+Send a filtered block status command to the NBD server.
+
+To check if the command completed, call L<nbd_aio_command_completed(3)>.
+Or supply the optional C<completion_callback> which will be invoked
+as described in L<libnbd(3)/Completion callbacks>.
+
+Other parameters behave as documented in L<nbd_block_status_filter(3)>."
+^ strict_call_description;
+    see_also = [SectionLink "Issuing asynchronous commands";
+                Link "aio_block_status_64"; Link "block_status_filter";
+                Link "can_meta_context"; Link "can_block_status_payload";
+                Link "set_strict_mode"];
+  };
+
   "aio_get_fd", {
     default_call with
     args = []; ret = RFd;
@@ -4149,6 +4208,8 @@  let first_version =
   "opt_extended_headers", (1, 16);
   "aio_opt_extended_headers", (1, 16);
   "can_block_status_payload", (1, 16);
+  "block_status_filter", (1, 16);
+  "aio_block_status_filter", (1, 16);

   (* These calls are proposed for a future version of libnbd, but
    * have not been added to any released version so far.
diff --git a/generator/states-issue-command.c b/generator/states-issue-command.c
index feea2672..da898aef 100644
--- a/generator/states-issue-command.c
+++ b/generator/states-issue-command.c
@@ -84,7 +84,9 @@   ISSUE_COMMAND.PREPARE_WRITE_PAYLOAD:
   assert (h->cmds_to_issue != NULL);
   cmd = h->cmds_to_issue;
   assert (cmd->cookie == be64toh (h->req.compact.handle));
-  if (cmd->type == NBD_CMD_WRITE) {
+  if (cmd->type == NBD_CMD_WRITE ||
+      (h->extended_headers && cmd->type == NBD_CMD_BLOCK_STATUS &&
+       cmd->flags & NBD_CMD_FLAG_PAYLOAD_LEN)) {
     h->wbuf = cmd->data;
     h->wlen = cmd->count;
     if (cmd->next && cmd->count < 64 * 1024)
diff --git a/lib/aio.c b/lib/aio.c
index 1188a328..fa142881 100644
--- a/lib/aio.c
+++ b/lib/aio.c
@@ -32,8 +32,13 @@  void
 nbd_internal_retire_and_free_command (struct command *cmd)
 {
   /* Free the callbacks. */
-  if (cmd->type == NBD_CMD_BLOCK_STATUS)
+  if (cmd->type == NBD_CMD_BLOCK_STATUS) {
+    if (cmd->ids) {
+      uint32_vector_reset (cmd->ids);
+      free (cmd->ids);
+    }
     FREE_CALLBACK (cmd->cb.fn.extent);
+  }
   if (cmd->type == NBD_CMD_READ)
     FREE_CALLBACK (cmd->cb.fn.chunk);
   FREE_CALLBACK (cmd->cb.completion);
diff --git a/lib/rw.c b/lib/rw.c
index 26533836..310c1285 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -242,6 +242,26 @@  nbd_unlocked_block_status_64 (struct nbd_handle *h,
   return wait_for_command (h, cookie);
 }

+/* Issue a filtered block status command and wait for the reply. */
+int
+nbd_unlocked_block_status_filter (struct nbd_handle *h,
+                                  uint64_t count, uint64_t offset,
+                                  char **filter,
+                                  nbd_extent64_callback *extent64,
+                                  uint32_t flags)
+{
+  int64_t cookie;
+  nbd_completion_callback c = NBD_NULL_COMPLETION;
+
+  cookie = nbd_unlocked_aio_block_status_filter (h, count, offset, filter,
+                                                 extent64, &c, flags);
+  if (cookie == -1)
+    return -1;
+
+  assert (CALLBACK_IS_NULL (*extent64));
+  return wait_for_command (h, cookie);
+}
+
 /* count_err represents the errno to return if bounds check fail */
 int64_t
 nbd_internal_command_common (struct nbd_handle *h,
@@ -250,6 +270,7 @@  nbd_internal_command_common (struct nbd_handle *h,
                              void *data, struct command_cb *cb)
 {
   struct command *cmd;
+  uint32_vector *ids = NULL;

   if (h->disconnect_request) {
       set_error (EINVAL, "cannot request more commands after NBD_CMD_DISC");
@@ -297,10 +318,23 @@  nbd_internal_command_common (struct nbd_handle *h,
     }
     break;

+  case NBD_CMD_BLOCK_STATUS:
+    if (data) {
+      ids = data;
+      count = ids->len * sizeof (uint32_t);
+      data = ids->ptr;
+      if (count > MAX_REQUEST_SIZE ||
+          (h->strict & LIBNBD_STRICT_PAYLOAD && count > h->payload_maximum)) {
+        set_error (ERANGE, "filter set too large");
+        goto err;
+      }
+      break;
+    }
+    /* fallthrough */
+  default:
     /* Other commands are limited by the 32 bit field in the command
      * structure on the wire, unless extended headers were negotiated.
      */
-  default:
     if (!h->extended_headers && count > UINT32_MAX) {
       set_error (ERANGE, "request too large: maximum request size is %" PRIu32,
                  UINT32_MAX);
@@ -320,6 +354,7 @@  nbd_internal_command_common (struct nbd_handle *h,
   cmd->offset = offset;
   cmd->count = count;
   cmd->data = data;
+  cmd->ids = ids;
   if (cb)
     cmd->cb = *cb;

@@ -364,8 +399,13 @@  nbd_internal_command_common (struct nbd_handle *h,
  err:
   /* Since we did not queue the command, we must free the callbacks. */
   if (cb) {
-    if (type == NBD_CMD_BLOCK_STATUS)
+    if (type == NBD_CMD_BLOCK_STATUS) {
+      if (ids) {
+        uint32_vector_reset (ids);
+        free (ids);
+      }
       FREE_CALLBACK (cb->fn.extent);
+    }
     if (type == NBD_CMD_READ)
       FREE_CALLBACK (cb->fn.chunk);
     FREE_CALLBACK (cb->completion);
@@ -609,3 +649,86 @@  nbd_unlocked_aio_block_status_64 (struct nbd_handle *h,
   return nbd_internal_command_common (h, flags, NBD_CMD_BLOCK_STATUS, offset,
                                       count, EINVAL, NULL, &cb);
 }
+
+int64_t
+nbd_unlocked_aio_block_status_filter (struct nbd_handle *h,
+                                      uint64_t count, uint64_t offset,
+                                      char **filter,
+                                      nbd_extent64_callback *extent64,
+                                      nbd_completion_callback *completion,
+                                      uint32_t flags)
+{
+  struct command_cb cb = { .fn.extent = *extent64,
+                           .completion = *completion };
+  uint32_vector *ids;
+  char *name;
+  size_t i;
+
+  /* Because this affects wire format, it is more convenient to manage
+   * PAYLOAD_LEN by what was negotiated than to require the user to
+   * have to set it correctly.
+   */
+  if (!h->extended_headers) {
+    set_error (ENOTSUP, "server does not support extended headers");
+    return -1;
+  }
+  flags |= LIBNBD_CMD_FLAG_PAYLOAD_LEN;
+
+  if (h->strict & LIBNBD_STRICT_COMMANDS) {
+    if (nbd_unlocked_can_block_status_payload (h) != 1) {
+      set_error (EINVAL,
+                 "server does not support the block status payload flag");
+      return -1;
+    }
+
+    if (!h->meta_valid || h->meta_contexts.len == 0) {
+      set_error (ENOTSUP, "did not negotiate any metadata contexts, "
+                 "either you did not call nbd_add_meta_context before "
+                 "connecting or the server does not support it");
+      return -1;
+    }
+  }
+
+  ids = calloc (1, sizeof *ids);
+  if (ids == NULL) {
+    set_error (errno, "calloc");
+    return -1;
+  }
+  if (uint32_vector_append (ids, htobe32 (count >> 32)) == -1 ||
+      uint32_vector_append (ids, htobe32 (count)) == -1) {
+    set_error (errno, "realloc");
+    goto fail;
+  }
+
+  /* O(n^2) search - hopefully filter and negotiated contexts are both small */
+  for ( ; (name = *filter) != NULL; filter++) {
+    if (!h->meta_valid) {
+      set_error (EINVAL, "context %s not negotiated", name);
+      goto fail;
+    }
+    for (i = 0; i < h->meta_contexts.len; i++) {
+      struct meta_context *meta = &h->meta_contexts.ptr[i];
+      if (strcmp (name, meta->name) == 0) {
+        if (uint32_vector_append (ids, htobe32 (meta->context_id)) == -1) {
+          set_error (errno, "realloc");
+          goto fail;
+        }
+        break;
+      }
+    }
+    if (i == h->meta_contexts.len) {
+      set_error (EINVAL, "context %s not negotiated", name);
+      goto fail;
+    }
+  }
+
+  SET_CALLBACK_TO_NULL (*extent64);
+  SET_CALLBACK_TO_NULL (*completion);
+  return nbd_internal_command_common (h, flags, NBD_CMD_BLOCK_STATUS, offset,
+                                      count, EINVAL, ids, &cb);
+
+ fail:
+  uint32_vector_reset (ids);
+  free (ids);
+  return -1;
+}
diff --git a/interop/block-status-payload.c b/interop/block-status-payload.c
index cdb0de7c..0c0348ba 100644
--- a/interop/block-status-payload.c
+++ b/interop/block-status-payload.c
@@ -54,11 +54,26 @@  cb (void *opaque, const char *metacontext, uint64_t offset,
   return 0;
 }

+static char **
+list (unsigned int use)
+{
+  static const char *array[ARRAY_SIZE (contexts) + 1];
+  size_t i, j;
+
+  assert (use < 1 << ARRAY_SIZE (contexts));
+  for (i = j = 0; i < ARRAY_SIZE (contexts); i++)
+    if (use & (1 << i))
+      array[j++] = contexts[i];
+  array[j] = NULL;
+  return (char **) array;
+}
+
 int
 main (int argc, char *argv[])
 {
   struct nbd_handle *nbd;
   int64_t exportsize;
+  uint64_t bytes_sent;
   unsigned int seen;
   size_t i;
   int r;
@@ -114,7 +129,107 @@  main (int argc, char *argv[])
   }
   assert (seen == 0xf);

-  /* FIXME: Test filtered calls once the API is added */
+  /* Filtering with all contexts listed, same effect as unfilitered call */
+  seen = 0;
+  if (nbd_block_status_filter (nbd, exportsize, 0, list (0xf),
+                               (nbd_extent64_callback) { .callback = cb,
+                                                         .user_data = &seen },
+                               0) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+  assert (seen == 0xf);
+
+  /* Filtering with just two out of four contexts; test optional flag */
+  seen = 0;
+  if (nbd_block_status_filter (nbd, exportsize, 0, list (0x5),
+                               (nbd_extent64_callback) { .callback = cb,
+                                                         .user_data = &seen },
+                               LIBNBD_CMD_FLAG_PAYLOAD_LEN) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+  assert (seen == 0x5);
+
+  /* Filtering with one context, near end of file (to make sure the
+   * payload length isn't confused with the effect length)
+   */
+  seen = 0;
+  if (nbd_block_status_filter (nbd, 1, exportsize - 1, list (0x2),
+                               (nbd_extent64_callback) { .callback = cb,
+                                                         .user_data = &seen },
+                               0) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+  assert (seen == 0x2);
+
+  /* Filtering with no contexts - pointless, so qemu rejects it */
+  bytes_sent = nbd_stats_bytes_sent (nbd);
+  seen = 0;
+  if (nbd_block_status_filter (nbd, exportsize, 0, list (0x0),
+                               (nbd_extent64_callback) { .callback = cb,
+                                                         .user_data = &seen },
+                               0) != -1) {
+    fprintf (stderr, "expecting block status failure\n");
+    exit (EXIT_FAILURE);
+  }
+  assert (seen == 0x0);
+  if (nbd_get_errno () != EINVAL) {
+    fprintf (stderr, "expecting EINVAL after block status failure\n");
+    exit (EXIT_FAILURE);
+  }
+  if (nbd_stats_bytes_sent (nbd) <= bytes_sent) {
+    fprintf (stderr, "expecting server-side rejection of bad request\n");
+    exit (EXIT_FAILURE);
+  }
+
+  /* Giving unknown string triggers EINVAL from libnbd */
+  bytes_sent = nbd_stats_bytes_sent (nbd);
+  seen = 0;
+  {
+    const char *bogus[] = { "qemu:dirty-bitmap:bitmap2", NULL };
+    if (nbd_block_status_filter (nbd, exportsize, 0, (char **) bogus,
+                                 (nbd_extent64_callback) { .callback = cb,
+                                                           .user_data = &seen },
+                                 0) != -1) {
+      fprintf (stderr, "expecting block status failure\n");
+      exit (EXIT_FAILURE);
+    }
+  }
+  if (nbd_get_errno () != EINVAL) {
+    fprintf (stderr, "expecting EINVAL after block status failure\n");
+    exit (EXIT_FAILURE);
+  }
+  assert (seen == 0x0);
+  if (nbd_stats_bytes_sent (nbd) != bytes_sent) {
+    fprintf (stderr, "expecting client-side rejection of bad request\n");
+    exit (EXIT_FAILURE);
+  }
+
+  /* Giving same string twice triggers EINVAL from qemu */
+  seen = 0;
+  {
+    const char *dupes[] = { "base:allocation", "base:allocation", NULL };
+    if (nbd_block_status_filter (nbd, exportsize, 0, (char **) dupes,
+                                 (nbd_extent64_callback) { .callback = cb,
+                                                           .user_data = &seen },
+                                 0) != -1) {
+      fprintf (stderr, "expecting block status failure\n");
+      exit (EXIT_FAILURE);
+    }
+  }
+  if (nbd_get_errno () != EINVAL) {
+    fprintf (stderr, "expecting EINVAL after block status failure\n");
+    exit (EXIT_FAILURE);
+  }
+  assert (seen == 0x0);
+  if (nbd_stats_bytes_sent (nbd) <= bytes_sent) {
+    fprintf (stderr, "expecting server-side rejection of bad request\n");
+    exit (EXIT_FAILURE);
+  }
+
+  /* Done */
   if (nbd_shutdown (nbd, 0) == -1) {
     fprintf (stderr, "%s\n", nbd_get_error ());
     exit (EXIT_FAILURE);
diff --git a/interop/block-status-payload.sh b/interop/block-status-payload.sh
index a178583b..e42e30e9 100755
--- a/interop/block-status-payload.sh
+++ b/interop/block-status-payload.sh
@@ -49,6 +49,7 @@  args = ["qemu-nbd", "-f", "qcow2", "-A", "-B", "bitmap0", "-B", "bitmap1",
 h.connect_systemd_socket_activation(args)
 assert h.aio_is_negotiating() is True
 assert h.get_extended_headers_negotiated() is False
+
 # Flag not available until info or go
 try:
   h.can_block_status_payload()
@@ -58,7 +59,18 @@  except nbd.Error:
 h.opt_info()
 assert h.can_block_status_payload() is False
 assert h.can_meta_context("base:allocation") is True
-h.opt_abort()
+
+# Filter request not allowed if not advertised
+def f():
+  assert False
+h.opt_go()
+assert h.can_block_status_payload() is False
+try:
+  h.block_status_filter(0, 512, ["base:allocation"], f)
+  assert False
+except nbd.Error:
+  pass
+h.shutdown()
 '

 # Conditional part of test: if qemu is new enough to support extended
diff --git a/info/info-can.sh b/info/info-can.sh
index e5f6a44b..111b0be2 100755
--- a/info/info-can.sh
+++ b/info/info-can.sh
@@ -38,6 +38,9 @@  requires bash -c "nbdkit sh --dump-plugin | grep has_can_cache=1"
 # and oldstyle never, but that feels like depending a bit too much on
 # the implementation.

+# --can block-status-payload is not supported by nbdkit yet. Testing
+# is done during interop with new-enough qemu.
+
 # --can structured-reply is not a per-export setting, but rather
 # something set on the server as a whole.