@@ -106,4 +106,8 @@ bool migration_incoming_postcopy_advised(void);
/* True if background snapshot is active */
bool migration_in_bg_snapshot(void);
+/* Wrapper for block active/inactive operations */
+bool migration_block_activate(Error **errp);
+bool migration_block_inactivate(void);
+
#endif
new file mode 100644
@@ -0,0 +1,94 @@
+/*
+ * Block activation tracking for migration purpose
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright (C) 2024 Red Hat, Inc.
+ */
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "qapi/error.h"
+#include "migration/migration.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+/*
+ * Migration-only cache to remember the block layer activation status.
+ * Protected by BQL.
+ *
+ * We need this because..
+ *
+ * - Migration can fail after block devices are invalidated (during
+ * switchover phase). When that happens, we need to be able to recover
+ * the block drive status by re-activating them.
+ *
+ * - Currently bdrv_inactivate_all() is not safe to be invoked on top of
+ * invalidated drives (even if bdrv_activate_all() is actually safe to be
+ * called any time!). It means remembering this could help migration to
+ * make sure it won't invalidate twice in a row, crashing QEMU. It can
+ * happen when we migrate a PAUSED VM from host1 to host2, then migrate
+ * again to host3 without starting it. TODO: a cleaner solution is to
+ * allow safe invoke of bdrv_inactivate_all() at anytime, like
+ * bdrv_activate_all().
+ *
+ * For freshly started QEMU, the flag is initialized to TRUE reflecting the
+ * scenario where QEMU owns block device ownerships.
+ *
+ * For incoming QEMU taking a migration stream, the flag is initialized to
+ * FALSE reflecting that the incoming side doesn't own the block devices,
+ * not until switchover happens.
+ */
+static bool migration_block_active;
+
+/* Setup the disk activation status */
+void migration_block_active_setup(bool active)
+{
+ migration_block_active = active;
+}
+
+bool migration_block_activate(Error **errp)
+{
+ ERRP_GUARD();
+
+ assert(bql_locked());
+
+ if (migration_block_active) {
+ trace_migration_block_activation("active-skipped");
+ return true;
+ }
+
+ trace_migration_block_activation("active");
+
+ bdrv_activate_all(errp);
+ if (*errp) {
+ error_report_err(error_copy(*errp));
+ return false;
+ }
+
+ migration_block_active = true;
+ return true;
+}
+
+bool migration_block_inactivate(void)
+{
+ int ret;
+
+ assert(bql_locked());
+
+ if (!migration_block_active) {
+ trace_migration_block_activation("inactive-skipped");
+ return true;
+ }
+
+ trace_migration_block_activation("inactive");
+
+ ret = bdrv_inactivate_all();
+ if (ret) {
+ error_report("%s: bdrv_inactivate_all() failed: %d",
+ __func__, ret);
+ return false;
+ }
+
+ migration_block_active = false;
+ return true;
+}
@@ -836,7 +836,7 @@ static void *colo_process_incoming_thread(void *opaque)
/* Make sure all file formats throw away their mutable metadata */
bql_lock();
- bdrv_activate_all(&local_err);
+ migration_block_activate(&local_err);
bql_unlock();
if (local_err) {
error_report_err(local_err);
@@ -11,6 +11,7 @@ migration_files = files(
system_ss.add(files(
'block-dirty-bitmap.c',
+ 'block-active.c',
'channel.c',
'channel-block.c',
'cpu-throttle.c',
@@ -738,7 +738,6 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
static void process_incoming_migration_bh(void *opaque)
{
- Error *local_err = NULL;
MigrationIncomingState *mis = opaque;
trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter");
@@ -769,11 +768,7 @@ static void process_incoming_migration_bh(void *opaque)
* Make sure all file formats throw away their mutable
* metadata. If error, don't restart the VM yet.
*/
- bdrv_activate_all(&local_err);
- if (local_err) {
- error_report_err(local_err);
- local_err = NULL;
- } else {
+ if (migration_block_activate(NULL)) {
vm_start();
}
} else {
@@ -1552,16 +1547,6 @@ static void migrate_fd_cancel(MigrationState *s)
}
}
}
- if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
- Error *local_err = NULL;
-
- bdrv_activate_all(&local_err);
- if (local_err) {
- error_report_err(local_err);
- } else {
- s->block_inactive = false;
- }
- }
}
void migration_add_notifier_mode(NotifierWithReturn *notify,
@@ -1860,6 +1845,12 @@ void qmp_migrate_incoming(const char *uri, bool has_channels,
return;
}
+ /*
+ * Newly setup incoming QEMU. Mark the block active state to reflect
+ * that the src currently owns the disks.
+ */
+ migration_block_active_setup(false);
+
once = false;
}
@@ -2512,7 +2503,6 @@ static int postcopy_start(MigrationState *ms, Error **errp)
QIOChannelBuffer *bioc;
QEMUFile *fb;
uint64_t bandwidth = migrate_max_postcopy_bandwidth();
- bool restart_block = false;
int cur_state = MIGRATION_STATUS_ACTIVE;
if (migrate_postcopy_preempt()) {
@@ -2548,13 +2538,10 @@ static int postcopy_start(MigrationState *ms, Error **errp)
goto fail;
}
- ret = bdrv_inactivate_all();
- if (ret < 0) {
- error_setg_errno(errp, -ret, "%s: Failed in bdrv_inactivate_all()",
- __func__);
+ if (!migration_block_inactivate()) {
+ error_setg(errp, "%s: Failed in bdrv_inactivate_all()", __func__);
goto fail;
}
- restart_block = true;
/*
* Cause any non-postcopiable, but iterative devices to
@@ -2624,8 +2611,6 @@ static int postcopy_start(MigrationState *ms, Error **errp)
goto fail_closefb;
}
- restart_block = false;
-
/* Now send that blob */
if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
error_setg(errp, "%s: Failed to send packaged data", __func__);
@@ -2670,17 +2655,7 @@ fail_closefb:
fail:
migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
MIGRATION_STATUS_FAILED);
- if (restart_block) {
- /* A failure happened early enough that we know the destination hasn't
- * accessed block devices, so we're safe to recover.
- */
- Error *local_err = NULL;
-
- bdrv_activate_all(&local_err);
- if (local_err) {
- error_report_err(local_err);
- }
- }
+ migration_block_activate(NULL);
migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL);
bql_unlock();
return -1;
@@ -2778,31 +2753,6 @@ static void migration_completion_postcopy(MigrationState *s)
trace_migration_completion_postcopy_end_after_complete();
}
-static void migration_completion_failed(MigrationState *s,
- int current_active_state)
-{
- if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE ||
- s->state == MIGRATION_STATUS_DEVICE)) {
- /*
- * If not doing postcopy, vm_start() will be called: let's
- * regain control on images.
- */
- Error *local_err = NULL;
-
- bql_lock();
- bdrv_activate_all(&local_err);
- if (local_err) {
- error_report_err(local_err);
- } else {
- s->block_inactive = false;
- }
- bql_unlock();
- }
-
- migrate_set_state(&s->state, current_active_state,
- MIGRATION_STATUS_FAILED);
-}
-
/**
* migration_completion: Used by migration_thread when there's not much left.
* The caller 'breaks' the loop when this returns.
@@ -2856,7 +2806,8 @@ fail:
error_free(local_err);
}
- migration_completion_failed(s, current_active_state);
+ migrate_set_state(&s->state, current_active_state,
+ MIGRATION_STATUS_FAILED);
}
/**
@@ -3286,6 +3237,11 @@ static void migration_iteration_finish(MigrationState *s)
case MIGRATION_STATUS_FAILED:
case MIGRATION_STATUS_CANCELLED:
case MIGRATION_STATUS_CANCELLING:
+ /*
+ * Re-activate the block drives if they're inactivated. Note, COLO
+ * shouldn't use block_active at all, so it should be no-op there.
+ */
+ migration_block_activate(NULL);
if (runstate_is_live(s->vm_old_state)) {
if (!runstate_check(RUN_STATE_SHUTDOWN)) {
vm_start();
@@ -3858,6 +3814,8 @@ static void migration_instance_init(Object *obj)
ms->state = MIGRATION_STATUS_NONE;
ms->mbps = -1;
ms->pages_per_second = -1;
+ /* Freshly started QEMU owns all the block devices */
+ migration_block_active_setup(true);
qemu_sem_init(&ms->pause_sem, 0);
qemu_mutex_init(&ms->error_mutex);
@@ -370,9 +370,6 @@ struct MigrationState {
/* Flag set once the migration thread is running (and needs joining) */
bool migration_thread_running;
- /* Flag set once the migration thread called bdrv_inactivate_all */
- bool block_inactive;
-
/* Migration is waiting for guest to unplug device */
QemuSemaphore wait_unplug_sem;
@@ -556,4 +553,7 @@ void migration_bitmap_sync_precopy(bool last_stage);
/* migration/block-dirty-bitmap.c */
void dirty_bitmap_mig_init(void);
+/* migration/block-active.c */
+void migration_block_active_setup(bool active);
+
#endif
@@ -1547,19 +1547,18 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
}
if (inactivate_disks) {
- /* Inactivate before sending QEMU_VM_EOF so that the
- * bdrv_activate_all() on the other end won't fail. */
- ret = bdrv_inactivate_all();
- if (ret) {
- error_setg(&local_err, "%s: bdrv_inactivate_all() failed (%d)",
- __func__, ret);
+ /*
+ * Inactivate before sending QEMU_VM_EOF so that the
+ * bdrv_activate_all() on the other end won't fail.
+ */
+ if (!migration_block_inactivate()) {
+ error_setg(&local_err, "%s: bdrv_inactivate_all() failed",
+ __func__);
migrate_set_error(ms, local_err);
error_report_err(local_err);
- qemu_file_set_error(f, ret);
+ qemu_file_set_error(f, -EFAULT);
return ret;
}
- /* Remember that we did this */
- s->block_inactive = true;
}
if (!in_postcopy) {
/* Postcopy stream will still be going */
@@ -2123,7 +2122,6 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
static void loadvm_postcopy_handle_run_bh(void *opaque)
{
- Error *local_err = NULL;
MigrationIncomingState *mis = opaque;
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter");
@@ -2146,12 +2144,11 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet.
*/
- bdrv_activate_all(&local_err);
+ bool success = migration_block_activate(NULL);
+
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated");
- if (local_err) {
- error_report_err(local_err);
- local_err = NULL;
- } else {
+
+ if (success) {
vm_start();
}
} else {
@@ -3193,11 +3190,7 @@ void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
* side of the migration take control of the images.
*/
if (live && !saved_vm_running) {
- ret = bdrv_inactivate_all();
- if (ret) {
- error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)",
- __func__, ret);
- }
+ migration_block_inactivate();
}
}
@@ -383,3 +383,6 @@ migration_pagecache_insert(void) "Error allocating page"
# cpu-throttle.c
cpu_throttle_set(int new_throttle_pct) "set guest CPU throttled by %d%%"
cpu_throttle_dirty_sync(void) ""
+
+# block-active.c
+migration_block_activation(const char *name) "%s"
@@ -31,6 +31,7 @@
#include "qapi/type-helpers.h"
#include "hw/mem/memory-device.h"
#include "hw/intc/intc.h"
+#include "migration/misc.h"
NameInfo *qmp_query_name(Error **errp)
{
@@ -103,13 +104,8 @@ void qmp_cont(Error **errp)
* Continuing after completed migration. Images have been
* inactivated to allow the destination to take control. Need to
* get control back now.
- *
- * If there are no inactive block nodes (e.g. because the VM was
- * just paused rather than completing a migration),
- * bdrv_inactivate_all() simply doesn't do anything.
*/
- bdrv_activate_all(&local_err);
- if (local_err) {
+ if (!migration_block_activate(&local_err)) {
error_propagate(errp, local_err);
return;
}