@@ -43,6 +43,13 @@ struct RAMBlock {
size_t page_size;
/* dirty bitmap used during migration */
unsigned long *bmap;
+ /* shadow dirty bitmap used when migrating to a file */
+ unsigned long *shadow_bmap;
+ /* offset in the file pages belonging to this ramblock are saved, used
+ * only during migration to a file
+ */
+ off_t bitmap_offset;
+ uint64_t pages_offset;
/* bitmap of already received pages in postcopy */
unsigned long *receivedmap;
@@ -165,7 +165,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
MIGRATION_CAPABILITY_XBZRLE,
MIGRATION_CAPABILITY_X_COLO,
MIGRATION_CAPABILITY_VALIDATE_UUID,
- MIGRATION_CAPABILITY_ZERO_COPY_SEND);
+ MIGRATION_CAPABILITY_ZERO_COPY_SEND,
+ MIGRATION_CAPABILITY_FIXED_RAM);
/* When we add fault tolerance, we could have several
migrations at once. For now we don't need to add
@@ -1325,6 +1326,27 @@ static bool migrate_caps_check(bool *cap_list,
}
#endif
+ if (cap_list[MIGRATION_CAPABILITY_FIXED_RAM]) {
+ if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
+ error_setg(errp, "Directly mapped memory incompatible with multifd");
+ return false;
+ }
+
+ if (cap_list[MIGRATION_CAPABILITY_XBZRLE]) {
+ error_setg(errp, "Directly mapped memory incompatible with xbzrle");
+ return false;
+ }
+
+ if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
+ error_setg(errp, "Directly mapped memory incompatible with compression");
+ return false;
+ }
+
+ if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
+ error_setg(errp, "Directly mapped memory incompatible with postcopy ram");
+ return false;
+ }
+ }
/* incoming side only */
if (runstate_check(RUN_STATE_INMIGRATE) &&
@@ -2629,6 +2651,11 @@ MultiFDCompression migrate_multifd_compression(void)
return s->parameters.multifd_compression;
}
+int migrate_fixed_ram(void)
+{
+ return migrate_get_current()->enabled_capabilities[MIGRATION_CAPABILITY_FIXED_RAM];
+}
+
int migrate_multifd_zlib_level(void)
{
MigrationState *s;
@@ -4189,6 +4216,21 @@ static void *bg_migration_thread(void *opaque)
return NULL;
}
+static int
+migrate_check_fixed_ram(MigrationState *s, Error **errp)
+{
+ if (!s->enabled_capabilities[MIGRATION_CAPABILITY_FIXED_RAM])
+ return 0;
+
+ if (!qemu_file_is_seekable(s->to_dst_file)) {
+ error_setg(errp, "Directly mapped memory requires a seekable transport");
+ return -1;
+ }
+
+ return 0;
+}
+
+
void migrate_fd_connect(MigrationState *s, Error *error_in)
{
Error *local_err = NULL;
@@ -4264,6 +4306,12 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
return;
}
+ if (migrate_check_fixed_ram(s, &local_err) < 0) {
+ migrate_fd_cleanup(s);
+ migrate_fd_error(s, local_err);
+ return;
+ }
+
if (resume) {
/* Wakeup the main migration thread to do the recovery */
migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
@@ -4397,6 +4445,7 @@ static Property migration_properties[] = {
DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
/* Migration capabilities */
+ DEFINE_PROP_MIG_CAP("x-fixed-ram", MIGRATION_CAPABILITY_FIXED_RAM),
DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
@@ -410,6 +410,7 @@ bool migrate_zero_blocks(void);
bool migrate_dirty_bitmaps(void);
bool migrate_ignore_shared(void);
bool migrate_validate_uuid(void);
+int migrate_fixed_ram(void);
bool migrate_auto_converge(void);
bool migrate_use_multifd(void);
@@ -1261,10 +1261,15 @@ static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
int len = 0;
if (buffer_is_zero(p, TARGET_PAGE_SIZE)) {
- len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
- qemu_put_byte(file, 0);
- len += 1;
- ram_release_page(block->idstr, offset);
+ if (migrate_fixed_ram()) {
+ /* for zero pages we don't need to do anything */
+ len = 1;
+ } else {
+ len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
+ qemu_put_byte(file, 0);
+ len += 1;
+ }
+ ram_release_page(block->idstr, offset);
}
return len;
}
@@ -1342,15 +1347,22 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
uint8_t *buf, bool async)
{
- ram_transferred_add(save_page_header(rs, rs->f, block,
- offset | RAM_SAVE_FLAG_PAGE));
- if (async) {
- qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
- migrate_release_ram() &&
- migration_in_postcopy());
- } else {
- qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
- }
+
+ if (migrate_fixed_ram()) {
+ qemu_put_buffer_at(rs->f, buf, TARGET_PAGE_SIZE,
+ block->pages_offset + offset);
+ set_bit(offset >> TARGET_PAGE_BITS, block->shadow_bmap);
+ } else {
+ ram_transferred_add(save_page_header(rs, rs->f, block,
+ offset | RAM_SAVE_FLAG_PAGE));
+ if (async) {
+ qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
+ migrate_release_ram() &&
+ migration_in_postcopy());
+ } else {
+ qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
+ }
+ }
ram_transferred_add(TARGET_PAGE_SIZE);
ram_counters.normal++;
return 1;
@@ -2683,6 +2695,8 @@ static void ram_save_cleanup(void *opaque)
block->clear_bmap = NULL;
g_free(block->bmap);
block->bmap = NULL;
+ g_free(block->shadow_bmap);
+ block->shadow_bmap = NULL;
}
xbzrle_cleanup();
@@ -3044,6 +3058,8 @@ static void ram_list_init_bitmaps(void)
*/
block->bmap = bitmap_new(pages);
bitmap_set(block->bmap, 0, pages);
+ block->shadow_bmap = bitmap_new(block->used_length >> TARGET_PAGE_BITS);
+ bitmap_set(block->shadow_bmap, 0, block->used_length >> TARGET_PAGE_BITS);
block->clear_bmap_shift = shift;
block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
}
@@ -3226,12 +3242,34 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
qemu_put_be64(f, block->used_length);
if (migrate_postcopy_ram() && block->page_size !=
- qemu_host_page_size) {
+ qemu_host_page_size) {
qemu_put_be64(f, block->page_size);
}
if (migrate_ignore_shared()) {
qemu_put_be64(f, block->mr->addr);
}
+
+ if (migrate_fixed_ram()) {
+ long num_pages = block->used_length >> TARGET_PAGE_BITS;
+ long bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long);
+
+
+ /* Needed for external programs (think analyze-migration.py) */
+ qemu_put_be32(f, bitmap_size);
+
+ /*
+ * Make pages offset aligned to TARGET_PAGE_SIZE to enable
+ * DIO in the future. Also add 8 to account for the page offset
+ * itself
+ */
+ block->bitmap_offset = qemu_get_offset(f) + 8;
+ block->pages_offset = ROUND_UP(block->bitmap_offset +
+ bitmap_size, TARGET_PAGE_SIZE);
+ qemu_put_be64(f, block->pages_offset);
+
+ /* Now prepare offset for next ramblock */
+ qemu_set_offset(f, block->pages_offset + block->used_length, SEEK_SET);
+ }
}
}
@@ -3249,6 +3287,17 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
return 0;
}
+static void ram_save_shadow_bmap(QEMUFile *f)
+{
+ RAMBlock *block;
+
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ long num_pages = block->used_length >> TARGET_PAGE_BITS;
+ long bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long);
+ qemu_put_buffer_at(f, (uint8_t *)block->shadow_bmap, bitmap_size, block->bitmap_offset);
+ }
+}
+
/**
* ram_save_iterate: iterative stage for migration
*
@@ -3358,9 +3407,15 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
return ret;
}
- qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
- qemu_fflush(f);
- ram_transferred_add(8);
+ /*
+ * For fixed ram we don't want to pollute the migration stream with
+ * EOS flags.
+ */
+ if (!migrate_fixed_ram()) {
+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+ qemu_fflush(f);
+ ram_transferred_add(8);
+ }
ret = qemu_file_get_error(f);
}
@@ -3405,7 +3460,10 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
pages = ram_find_and_save_block(rs);
/* no more blocks to sent */
if (pages == 0) {
- break;
+ if (migrate_fixed_ram()) {
+ ram_save_shadow_bmap(f);
+ }
+ break;
}
if (pages < 0) {
ret = pages;
@@ -3428,8 +3486,10 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
return ret;
}
- qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
- qemu_fflush(f);
+ if (!migrate_fixed_ram()) {
+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+ qemu_fflush(f);
+ }
return 0;
}
@@ -240,6 +240,7 @@ static bool should_validate_capability(int capability)
/* Validate only new capabilities to keep compatibility. */
switch (capability) {
case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
+ case MIGRATION_CAPABILITY_FIXED_RAM:
return true;
default:
return false;
@@ -485,7 +485,7 @@
##
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
- 'compress', 'events', 'postcopy-ram',
+ 'compress', 'events', 'postcopy-ram', 'fixed-ram',
{ 'name': 'x-colo', 'features': [ 'unstable' ] },
'release-ram',
'block', 'return-path', 'pause-before-switchover', 'multifd',
Implement 'fixed-ram' feature. The core of the feature is to ensure that each ram page of the migration stream has a specific offset in the resulting migration stream. The reason why we'd want such behavior are two fold: - When doing a 'fixed-ram' migration the resulting file will have a bounded size, since pages which are dirtied multiple times will always go to a fixed location in the file, rather than constantly being added to a sequential stream. This eliminates cases where a vm with, say, 1g of ram can result in a migration file that's 10s of Gbs, provided that the workload constantly redirties memory. - It paves the way to implement DIO-enabled save/restore of the migration stream as the pages are ensured to be written at aligned offsets. The features requires changing the format. First, a bitmap is introduced which tracks which pages have been written (i.e are dirtied) during migration and subsequently it's being written in the resultin file, again at a fixed location for every ramblock. Zero pages are ignored as they'd be zero in the destination migration as well. With the changed format data would look like the following: |name len|name|used_len|pc*|bitmap_size|pages_offset|bitmap|pages| * pc - refers to the page_size/mr->addr members, so newly added members begin from "bitmap_size". This layout is initialized during ram_save_setup so instead of having a sequential stream of pages that follow the ramblock headers the dirty pages for a ramblock follow its header. Since all pages have a fixed location RAM_SAVE_FLAG_EOS is no longer generated on every migration iteration but there is effectively a single RAM_SAVE_FLAG_EOS right at the end. Signed-off-by: Nikolay Borisov <nborisov@suse.com> --- include/exec/ramblock.h | 7 +++ migration/migration.c | 51 +++++++++++++++++++- migration/migration.h | 1 + migration/ram.c | 100 ++++++++++++++++++++++++++++++++-------- migration/savevm.c | 1 + qapi/migration.json | 2 +- 6 files changed, 140 insertions(+), 22 deletions(-)