@@ -38,6 +38,7 @@ GlobalProperty hw_compat_9_0[] = {
{"arm-cpu", "backcompat-cntfrq", "true" },
{"scsi-disk-base", "migrate-emulated-scsi-request", "false" },
{"vfio-pci", "skip-vsc-check", "false" },
+ { "migration", "x-switchover-abort", "off" },
};
const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0);
@@ -16,6 +16,7 @@ bool migrate_background_snapshot(void);
bool migrate_dirty_limit(void);
bool migrate_postcopy_ram(void);
bool migrate_switchover_ack(void);
+bool migrate_switchover_abort(void);
/* parameters */
@@ -312,6 +312,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "%s: '%s'\n",
MigrationParameter_str(MIGRATION_PARAMETER_TLS_AUTHZ),
params->tls_authz);
+ assert(params->has_switchover_limit);
+ monitor_printf(mon, "%s: %" PRIu64 " ms\n",
+ MigrationParameter_str(MIGRATION_PARAMETER_SWITCHOVER_LIMIT),
+ params->switchover_limit);
+
if (params->has_block_bitmap_mapping) {
const BitmapMigrationNodeAliasList *bmnal;
@@ -624,6 +629,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
p->has_mode = true;
visit_type_MigMode(v, param, &p->mode, &err);
break;
+ case MIGRATION_PARAMETER_SWITCHOVER_LIMIT:
+ p->has_switchover_limit = true;
+ visit_type_size(v, param, &p->switchover_limit, &err);
+ break;
+
default:
assert(0);
}
@@ -113,6 +113,7 @@ static void migration_downtime_start(MigrationState *s)
{
trace_vmstate_downtime_checkpoint("src-downtime-start");
s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ s->downtime_now = s->downtime_start;
}
static void migration_downtime_end(MigrationState *s)
@@ -204,6 +205,10 @@ static int migration_stop_vm(MigrationState *s, RunState state)
trace_vmstate_downtime_checkpoint("src-vm-stopped");
trace_migration_completion_vm_stop(ret);
+ if (migration_downtime_exceeded()) {
+ migration_set_downtime_exceeded_error(s, s->to_dst_file);
+ ret = -1;
+ }
return ret;
}
@@ -1652,6 +1657,7 @@ int migrate_init(MigrationState *s, Error **errp)
s->mbps = 0.0;
s->pages_per_second = 0.0;
s->downtime = 0;
+ s->downtime_now = 0;
s->expected_downtime = 0;
s->setup_time = 0;
s->start_postcopy = false;
@@ -2758,6 +2764,39 @@ static void migration_completion_failed(MigrationState *s,
MIGRATION_STATUS_FAILED);
}
+int64_t migration_get_current_downtime(MigrationState *s)
+{
+ s->downtime_now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+ return s->downtime_now - s->downtime_start;
+}
+
+bool migration_downtime_exceeded(void)
+{
+ MigrationState *s = migrate_get_current();
+
+ if (!migrate_switchover_abort()) {
+ return 0;
+ }
+
+ return migration_get_current_downtime(s) >= s->parameters.downtime_limit +
+ s->parameters.switchover_limit;
+}
+
+int migration_set_downtime_exceeded_error(MigrationState *s, QEMUFile *f)
+{
+ int64_t limit = s->parameters.downtime_limit;
+ Error *errp = NULL;
+
+ error_setg(&errp, "Downtime Limit of %" PRIi64" ms exceeded by %"PRIi64" ms",
+ limit, (s->downtime_now - s->downtime_start) - limit);
+
+ migration_cancel(errp);
+ error_free(errp);
+
+ return -EFAULT;
+}
+
/**
* migration_completion: Used by migration_thread when there's not much left.
* The caller 'breaks' the loop when this returns.
@@ -338,6 +338,8 @@ struct MigrationState {
/* Timestamp when VM is down (ms) to migrate the last stuff */
int64_t downtime_start;
int64_t downtime;
+ /* Current measured downtime on source */
+ int64_t downtime_now;
int64_t expected_downtime;
bool capabilities[MIGRATION_CAPABILITY__MAX];
int64_t setup_time;
@@ -519,6 +521,9 @@ void migration_consume_urgent_request(void);
bool migration_rate_limit(void);
void migration_bh_schedule(QEMUBHFunc *cb, void *opaque);
void migration_cancel(const Error *error);
+int64_t migration_get_current_downtime(MigrationState *s);
+int migration_set_downtime_exceeded_error(MigrationState *s, QEMUFile *f);
+bool migration_downtime_exceeded(void);
void migration_populate_vfio_info(MigrationInfo *info);
void migration_reset_vfio_bytes_transferred(void);
@@ -40,6 +40,13 @@
* for sending the last part */
#define DEFAULT_MIGRATE_SET_DOWNTIME 300
+/*
+ * Time in milliseconds that downtime can exceed downtime limit
+ * on source or destination before migration aborts if capability
+ * switchover_abort is enabled
+ */
+#define DEFAULT_MIGRATE_SET_SWITCHOVER_LIMIT 0
+
/* Define default autoconverge cpu throttle migration parameters */
#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50
#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
@@ -162,6 +169,9 @@ Property migration_properties[] = {
DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState,
parameters.zero_page_detection,
ZERO_PAGE_DETECTION_MULTIFD),
+ DEFINE_PROP_UINT64("x-switchover-limit", MigrationState,
+ parameters.switchover_limit,
+ DEFAULT_MIGRATE_SET_SWITCHOVER_LIMIT),
/* Migration capabilities */
DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@@ -184,6 +194,8 @@ Property migration_properties[] = {
#endif
DEFINE_PROP_MIG_CAP("x-switchover-ack",
MIGRATION_CAPABILITY_SWITCHOVER_ACK),
+ DEFINE_PROP_MIG_CAP("x-switchover-abort",
+ MIGRATION_CAPABILITY_SWITCHOVER_ABORT),
DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT),
DEFINE_PROP_MIG_CAP("mapped-ram", MIGRATION_CAPABILITY_MAPPED_RAM),
DEFINE_PROP_END_OF_LIST(),
@@ -315,6 +327,13 @@ bool migrate_switchover_ack(void)
return s->capabilities[MIGRATION_CAPABILITY_SWITCHOVER_ACK];
}
+bool migrate_switchover_abort(void)
+{
+ MigrationState *s = migrate_get_current();
+
+ return s->capabilities[MIGRATION_CAPABILITY_SWITCHOVER_ABORT];
+}
+
bool migrate_validate_uuid(void)
{
MigrationState *s = migrate_get_current();
@@ -592,6 +611,14 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
}
}
+ if (new_caps[MIGRATION_CAPABILITY_SWITCHOVER_ABORT]) {
+ if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) {
+ error_setg(errp, "Capability 'switchover-abort' requires capability "
+ "'return-path'");
+ return false;
+ }
+ }
+
return true;
}
@@ -824,6 +851,13 @@ ZeroPageDetection migrate_zero_page_detection(void)
return s->parameters.zero_page_detection;
}
+void migrate_set_switchover_limit(uint64_t value)
+{
+ MigrationState *s = migrate_get_current();
+
+ s->parameters.switchover_limit = value;
+}
+
/* parameters helpers */
AnnounceParameters *migrate_announce_params(void)
@@ -905,6 +939,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
params->mode = s->parameters.mode;
params->has_zero_page_detection = true;
params->zero_page_detection = s->parameters.zero_page_detection;
+ params->has_switchover_limit = true;
+ params->switchover_limit = s->parameters.switchover_limit;
return params;
}
@@ -937,6 +973,7 @@ void migrate_params_init(MigrationParameters *params)
params->has_vcpu_dirty_limit = true;
params->has_mode = true;
params->has_zero_page_detection = true;
+ params->has_switchover_limit = true;
}
/*
@@ -1110,6 +1147,15 @@ bool migrate_params_check(MigrationParameters *params, Error **errp)
return false;
}
+ if (params->has_switchover_limit &&
+ (params->switchover_limit > MAX_MIGRATE_DOWNTIME)) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+ "switchover_limit",
+ "an integer in the range of 0 to "
+ stringify(MAX_MIGRATE_DOWNTIME)" ms");
+ return false;
+ }
+
return true;
}
@@ -1216,6 +1262,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
if (params->has_zero_page_detection) {
dest->zero_page_detection = params->zero_page_detection;
}
+
+ if (params->has_switchover_limit) {
+ dest->switchover_limit = params->switchover_limit;
+ }
+
}
static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1341,6 +1392,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
if (params->has_zero_page_detection) {
s->parameters.zero_page_detection = params->zero_page_detection;
}
+
+ if (params->has_switchover_limit) {
+ s->parameters.switchover_limit = params->switchover_limit;
+ }
+
}
void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -84,6 +84,7 @@ const char *migrate_tls_creds(void);
const char *migrate_tls_hostname(void);
uint64_t migrate_xbzrle_cache_size(void);
ZeroPageDetection migrate_zero_page_detection(void);
+void migrate_set_switchover_limit(uint64_t value);
/* parameters helpers */
@@ -1492,6 +1492,7 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
{
int64_t start_ts_each, end_ts_each;
SaveStateEntry *se;
+ MigrationState *s = migrate_get_current();
int ret;
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
@@ -1523,6 +1524,11 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
end_ts_each - start_ts_each);
+ if (migration_downtime_exceeded()) {
+ if (migration_set_downtime_exceeded_error(s, f)) {
+ return -1;
+ }
+ }
}
trace_vmstate_downtime_checkpoint("src-iterable-saved");
@@ -1561,6 +1567,13 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id,
end_ts_each - start_ts_each);
+
+ if (migration_downtime_exceeded()) {
+ if (migration_set_downtime_exceeded_error(ms, f)) {
+ return -1;
+ }
+ }
+
}
if (inactivate_disks) {
@@ -469,6 +469,10 @@
# each RAM page. Requires a migration URI that supports seeking,
# such as a file. (since 9.0)
#
+# @switchover-abort: abort migration if downtime exceeds the downtime
+# limit configured by the specified value by switchover-limit
+# migration parameter.
+#
# Features:
#
# @unstable: Members @x-colo and @x-ignore-shared are experimental.
@@ -485,7 +489,7 @@
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
'validate-uuid', 'background-snapshot',
'zero-copy-send', 'postcopy-preempt', 'switchover-ack',
- 'dirty-limit', 'mapped-ram'] }
+ 'dirty-limit', 'mapped-ram', 'switchover-abort'] }
##
# @MigrationCapabilityStatus:
@@ -821,6 +825,10 @@
# See description in @ZeroPageDetection. Default is 'multifd'.
# (since 9.0)
#
+# @switchover-limit: Switchover limit (ms) that would be used to
+# intiate abort of live migration if the total switchover time
+# exceeded downtime_limit + switchover_limit (Since 9.1)
+#
# Features:
#
# @unstable: Members @x-checkpoint-delay and
@@ -845,7 +853,8 @@
{ 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] },
'vcpu-dirty-limit',
'mode',
- 'zero-page-detection'] }
+ 'zero-page-detection',
+ 'switchover-limit'] }
##
# @MigrateSetParameters:
@@ -991,6 +1000,10 @@
# See description in @ZeroPageDetection. Default is 'multifd'.
# (since 9.0)
#
+# @switchover-limit: Switchover limit (ms) that would be used to
+# intiate abort of live migration if the total switchover time
+# exceeded downtime_limit + switchover_limit (Since 9.1)
+#
# Features:
#
# @unstable: Members @x-checkpoint-delay and
@@ -1030,7 +1043,8 @@
'features': [ 'unstable' ] },
'*vcpu-dirty-limit': 'uint64',
'*mode': 'MigMode',
- '*zero-page-detection': 'ZeroPageDetection'} }
+ '*zero-page-detection': 'ZeroPageDetection',
+ '*switchover-limit': 'uint64'} }
##
# @migrate-set-parameters:
@@ -1190,6 +1204,10 @@
# See description in @ZeroPageDetection. Default is 'multifd'.
# (since 9.0)
#
+# @switchover-limit: Switchover limit (ms) that would be used to
+# intiate abort of live migration if the total switchover time
+# exceeded downtime_limit + switchover_limit (Since 9.1)
+#
# Features:
#
# @unstable: Members @x-checkpoint-delay and
@@ -1226,7 +1244,8 @@
'features': [ 'unstable' ] },
'*vcpu-dirty-limit': 'uint64',
'*mode': 'MigMode',
- '*zero-page-detection': 'ZeroPageDetection'} }
+ '*zero-page-detection': 'ZeroPageDetection',
+ '*switchover-limit': 'uint64'} }
##
# @query-migrate-parameters:
Introduce capability switchover_abort and migration parameter switchover_limit to allow for live migration abort when the source downtime exceeded by switchover_limit. Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> --- hw/core/machine.c | 1 + include/migration/client-options.h | 1 + migration/migration-hmp-cmds.c | 10 ++++++ migration/migration.c | 39 +++++++++++++++++++++ migration/migration.h | 5 +++ migration/options.c | 56 ++++++++++++++++++++++++++++++ migration/options.h | 1 + migration/savevm.c | 13 +++++++ qapi/migration.json | 27 +++++++++++--- 9 files changed, 149 insertions(+), 4 deletions(-)