diff mbox series

[v4,33/34] migration: Add support for fdset with multifd + file

Message ID 20240220224138.24759-34-farosas@suse.de (mailing list archive)
State New, archived
Headers show
Series migration: File based migration with multifd and fixed-ram | expand

Commit Message

Fabiano Rosas Feb. 20, 2024, 10:41 p.m. UTC
Allow multifd to use an fdset when migrating to a file. This is useful
for the scenario where the management layer wants to have control over
the migration file.

By receiving the file descriptors directly, QEMU can delegate some
high level operating system operations to the management layer (such
as mandatory access control). The management layer might also want to
add its own headers before the migration stream.

Enable the "file:/dev/fdset/#" syntax for the multifd migration with
fixed-ram. The requirements for the fdset mechanism are:

On the migration source side:

- the fdset must contain two fds that are not duplicates between
  themselves;
- if direct-io is to be used, exactly one of the fds must have the
  O_DIRECT flag set;
- the file must be opened with WRONLY both times.

On the migration destination side:

- the fdset must contain one fd;
- the file must be opened with RDONLY.

Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
 docs/devel/migration/main.rst | 18 +++++++
 migration/file.c              | 97 +++++++++++++++++++++++++++++++++--
 2 files changed, 111 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst
index 8024275d6d..ac6b6a8eb0 100644
--- a/docs/devel/migration/main.rst
+++ b/docs/devel/migration/main.rst
@@ -46,6 +46,24 @@  over any transport.
   application to add its own metadata to the start of the file without
   QEMU interference.
 
+  The file migration also supports using a file that has already been
+  opened. A set of file descriptors is passed to QEMU via an "fdset"
+  (see add-fd QMP command documentation). This method allows a
+  management application to have control over the migration file
+  opening operation. There are, however, strict requirements to this
+  interface:
+
+  On the migration source side:
+    - the fdset must contain two file descriptors that are not
+      duplicates between themselves;
+    - if the direct-io capability is to be used, exactly one of the
+      file descriptors must have the O_DIRECT flag set;
+    - the file must be opened with WRONLY both times.
+
+  On the migration destination side:
+    - the fdset must contain one file descriptor;
+    - the file must be opened with RDONLY.
+
 In addition, support is included for migration using RDMA, which
 transports the page data using ``RDMA``, where the hardware takes care of
 transporting the pages, and the load on the CPU is much lower.  While the
diff --git a/migration/file.c b/migration/file.c
index f1c7615fb6..95f3210faf 100644
--- a/migration/file.c
+++ b/migration/file.c
@@ -10,12 +10,14 @@ 
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "qapi/qapi-commands-misc.h"
 #include "channel.h"
 #include "fd.h"
 #include "file.h"
 #include "migration.h"
 #include "io/channel-file.h"
 #include "io/channel-util.h"
+#include "monitor/monitor.h"
 #include "options.h"
 #include "trace.h"
 
@@ -23,6 +25,7 @@ 
 
 static struct FileOutgoingArgs {
     char *fname;
+    int64_t fdset_id;
 } outgoing_args;
 
 /* Remove the offset option from @filespec and return it in @offsetp. */
@@ -44,6 +47,84 @@  int file_parse_offset(char *filespec, uint64_t *offsetp, Error **errp)
     return 0;
 }
 
+/*
+ * If the open flags and file status flags from the file descriptors
+ * in the fdset don't match what QEMU expects, errno gets set to
+ * EACCES. Let's provide a more user-friendly message.
+ */
+static void file_fdset_error(int flags, Error **errp)
+{
+    ERRP_GUARD();
+
+    if (errno == EACCES) {
+        /* ditch the previous error */
+        error_free(*errp);
+        *errp = NULL;
+
+        error_setg(errp, "Fdset is missing a file descriptor with flags: 0x%x",
+                   flags);
+    }
+}
+
+static void file_remove_fdset(void)
+{
+    if (outgoing_args.fdset_id != -1) {
+        qmp_remove_fd(outgoing_args.fdset_id, false, -1, NULL);
+        outgoing_args.fdset_id = -1;
+    }
+}
+
+/*
+ * Due to the behavior of the dup() system call, we need the fdset to
+ * have two non-duplicate fds so we can enable direct IO in the
+ * secondary channels without affecting the main channel.
+ */
+static bool file_parse_fdset(const char *filename, int64_t *fdset_id,
+                             Error **errp)
+{
+    FdsetInfoList *fds_info;
+    FdsetFdInfoList *fd_info;
+    const char *fdset_id_str;
+    int nfds = 0;
+
+    *fdset_id = -1;
+
+    if (!strstart(filename, "/dev/fdset/", &fdset_id_str)) {
+        return true;
+    }
+
+    if (!migrate_multifd()) {
+        error_setg(errp, "fdset is only supported with multifd");
+        return false;
+    }
+
+    *fdset_id = qemu_parse_fd(fdset_id_str);
+
+    for (fds_info = qmp_query_fdsets(NULL); fds_info;
+         fds_info = fds_info->next) {
+
+        if (*fdset_id != fds_info->value->fdset_id) {
+            continue;
+        }
+
+        for (fd_info = fds_info->value->fds; fd_info; fd_info = fd_info->next) {
+            if (nfds++ > 2) {
+                break;
+            }
+        }
+    }
+
+    if (nfds != 2) {
+        error_setg(errp, "Outgoing migration needs two fds in the fdset, "
+                   "got %d", nfds);
+        qmp_remove_fd(*fdset_id, false, -1, NULL);
+        *fdset_id = -1;
+        return false;
+    }
+
+    return true;
+}
+
 int file_send_channel_destroy(QIOChannel *ioc)
 {
     if (ioc) {
@@ -52,6 +133,7 @@  int file_send_channel_destroy(QIOChannel *ioc)
     g_free(outgoing_args.fname);
     outgoing_args.fname = NULL;
 
+    file_remove_fdset();
     return 0;
 }
 
@@ -82,6 +164,7 @@  bool file_send_channel_create(gpointer opaque, Error **errp)
     } else {
         ioc = qio_channel_file_new_path(outgoing_args.fname, flags, 0, errp);
         if (!ioc) {
+            file_fdset_error(flags, errp);
             return false;
         }
     }
@@ -105,13 +188,18 @@  void file_start_outgoing_migration(MigrationState *s,
 
     trace_migration_file_outgoing(filename);
 
-    fioc = qio_channel_file_new_path(filename, flags, mode, errp);
-    if (!fioc) {
+    if (!file_parse_fdset(filename, &outgoing_args.fdset_id, errp)) {
         return;
     }
 
     outgoing_args.fname = g_strdup(filename);
 
+    fioc = qio_channel_file_new_path(filename, flags, mode, errp);
+    if (!fioc) {
+        file_fdset_error(flags, errp);
+        return;
+    }
+
     ioc = QIO_CHANNEL(fioc);
     if (offset && qio_channel_io_seek(ioc, offset, SEEK_SET, errp) < 0) {
         return;
@@ -135,12 +223,13 @@  void file_start_incoming_migration(FileMigrationArgs *file_args, Error **errp)
     QIOChannelFile *fioc = NULL;
     uint64_t offset = file_args->offset;
     int channels = 1;
-    int i = 0, fd;
+    int i = 0, fd, flags = O_RDONLY;
 
     trace_migration_file_incoming(filename);
 
-    fioc = qio_channel_file_new_path(filename, O_RDONLY, 0, errp);
+    fioc = qio_channel_file_new_path(filename, flags, 0, errp);
     if (!fioc) {
+        file_fdset_error(flags, errp);
         return;
     }