Message ID | 1495539071-12995-8-git-send-email-a.perevalov@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, May 23, 2017 at 02:31:08PM +0300, Alexey Perevalov wrote: > This patch adds ability to track down already copied > pages, it's necessary for calculation vCPU block time in > postcopy migration feature and maybe for restore after > postcopy migration failure. > > Functions which work with RAMBlock are placed into ram.c, > due to TARGET_WORDS_BIGENDIAN is poisoned int postcopy-ram.c - > hardware independed code. > > Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com> > --- > include/migration/migration.h | 16 +++++++++++ > migration/postcopy-ram.c | 22 ++++++++++++--- > migration/ram.c | 63 +++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 97 insertions(+), 4 deletions(-) > > diff --git a/include/migration/migration.h b/include/migration/migration.h > index 449cb07..4e05c83 100644 > --- a/include/migration/migration.h > +++ b/include/migration/migration.h > @@ -101,6 +101,20 @@ struct MigrationIncomingState { > LoadStateEntry_Head loadvm_handlers; > > /* > + * bitmap indicates whether page copied, > + * based on ramblock offset > + * now it is using only for blocktime calculation in > + * postcopy migration, so livetime of this entry: > + * since user requested blocktime calculation, > + * till the end of postcopy migration > + * as an example it could represend following memory map > + * ___________________________________ > + * |4k pages | hugepages | 4k pages Can we really do this? The problem is AFAIU migration stream is sending pages only in target page size, even for huge pages... so even for huge pages we should still need per TARGET_PAGE_SIZE bitmap? (Please refer to ram_state_init() on init of RAMBlock.bmap) > + * > + * */ > + unsigned long *copied_pages; > + > + /* > * PostcopyBlocktimeContext to keep information for postcopy > * live migration, to calculate vCPU block time > * */ > @@ -279,6 +293,8 @@ int migrate_compress_threads(void); > int migrate_decompress_threads(void); > bool migrate_use_events(void); > bool migrate_postcopy_blocktime(void); > +unsigned long int get_copied_bit_offset(ram_addr_t addr); > +unsigned long int *copied_pages_bitmap_new(void); > > /* Sending on the return path - generic and then for each message type */ > void migrate_send_rp_message(MigrationIncomingState *mis, > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c > index 5435a40..d647769 100644 > --- a/migration/postcopy-ram.c > +++ b/migration/postcopy-ram.c > @@ -94,23 +94,34 @@ static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx) > > static void postcopy_migration_cb(Notifier *n, void *data) > { > - PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, > - postcopy_notifier); > MigrationState *s = data; > if (migration_has_finished(s) || migration_has_failed(s)) { > + MigrationIncomingState *mis = migration_incoming_get_current(); > + PostcopyBlocktimeContext *ctx = mis->blocktime_ctx; > + > + if (!ctx) { > + return; > + } If we are in postcopy_migration_cb() already, then we have registered the notifiers, then... could this (!ctx) really happen? > + > g_free(ctx->page_fault_vcpu_time); > /* g_free is NULL robust */ > ctx->page_fault_vcpu_time = NULL; > g_free(ctx->vcpu_addr); > ctx->vcpu_addr = NULL; > + g_free(mis->copied_pages); > + mis->copied_pages = NULL; > } > } > > static void migration_exit_cb(Notifier *n, void *data) > { > - PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, > - exit_notifier); > + MigrationIncomingState *mis = migration_incoming_get_current(); > + PostcopyBlocktimeContext *ctx = mis->blocktime_ctx; > + if (!ctx) { > + return; > + } I failed to find out why touched this up... Thanks, > destroy_blocktime_context(ctx); > + mis->blocktime_ctx = NULL; > } > > static struct PostcopyBlocktimeContext *blocktime_context_new(void) > @@ -227,6 +238,9 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) > mis->blocktime_ctx = blocktime_context_new(); > } > > + if (!mis->copied_pages) { > + mis->copied_pages = copied_pages_bitmap_new(); > + } > asked_features |= UFFD_FEATURE_THREAD_ID; > } > #endif > diff --git a/migration/ram.c b/migration/ram.c > index f59fdd4..1abb6bb 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -2661,6 +2661,69 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) > return ret; > } > > +static unsigned long get_total_bits_per_page(ram_addr_t mem_length, > + size_t page_size) > +{ > + unsigned long page_size_bit = find_last_bit((unsigned long *)&page_size, > + BITS_PER_LONG); > + unsigned long total_bits = mem_length >> page_size_bit; > + if (mem_length % page_size) { > + total_bits += 1; > + } > + return total_bits; > +} > + > +/* > + * this function allocates bitmap for copied pages, > + * also it calculates > + * how many entries do we need > + * */ > +unsigned long int *copied_pages_bitmap_new(void) > +{ > + RAMBlock *block; > + unsigned long int total_bits = 0; > + > + rcu_read_lock(); > + RAMBLOCK_FOREACH(block) { > + /* in general case used_length may not be aligned > + * by page_size */ > + > + total_bits += get_total_bits_per_page(block->used_length, > + block->page_size); > + } > + rcu_read_unlock(); > + > + return bitmap_new(total_bits); > +} > + > +unsigned long int get_copied_bit_offset(ram_addr_t addr) > +{ > + RAMBlock *block; > + unsigned long int iter_bit = 0; > + > + rcu_read_lock(); > + RAMBLOCK_FOREACH(block) { > + /* in general case used_length may not be aligned > + * by page_size */ > + if (block->host == NULL) { > + continue; > + } > + if (addr - (ram_addr_t)block->host < block->max_length) { > + unsigned long page_size_bit = find_last_bit( > + (unsigned long *)&block->page_size, > + BITS_PER_LONG); > + ram_addr_t offset = addr - (ram_addr_t)block->host; > + iter_bit += offset >> page_size_bit; > + break; > + } > + iter_bit += get_total_bits_per_page(block->used_length, > + block->page_size); > + } > + rcu_read_unlock(); > + > + return iter_bit; > +} > + > static SaveVMHandlers savevm_ram_handlers = { > .save_live_setup = ram_save_setup, > .save_live_iterate = ram_save_iterate, > -- > 1.8.3.1 >
On Wed, May 24, 2017 at 02:57:36PM +0800, Peter Xu wrote: > On Tue, May 23, 2017 at 02:31:08PM +0300, Alexey Perevalov wrote: > > This patch adds ability to track down already copied > > pages, it's necessary for calculation vCPU block time in > > postcopy migration feature and maybe for restore after > > postcopy migration failure. > > > > Functions which work with RAMBlock are placed into ram.c, > > due to TARGET_WORDS_BIGENDIAN is poisoned int postcopy-ram.c - > > hardware independed code. > > > > Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com> > > --- > > include/migration/migration.h | 16 +++++++++++ > > migration/postcopy-ram.c | 22 ++++++++++++--- > > migration/ram.c | 63 +++++++++++++++++++++++++++++++++++++++++++ > > 3 files changed, 97 insertions(+), 4 deletions(-) > > > > diff --git a/include/migration/migration.h b/include/migration/migration.h > > index 449cb07..4e05c83 100644 > > --- a/include/migration/migration.h > > +++ b/include/migration/migration.h > > @@ -101,6 +101,20 @@ struct MigrationIncomingState { > > LoadStateEntry_Head loadvm_handlers; > > > > /* > > + * bitmap indicates whether page copied, > > + * based on ramblock offset > > + * now it is using only for blocktime calculation in > > + * postcopy migration, so livetime of this entry: > > + * since user requested blocktime calculation, > > + * till the end of postcopy migration > > + * as an example it could represend following memory map > > + * ___________________________________ > > + * |4k pages | hugepages | 4k pages > > Can we really do this? > > The problem is AFAIU migration stream is sending pages only in target > page size, even for huge pages... so even for huge pages we should > still need per TARGET_PAGE_SIZE bitmap? sending maybe, but copying to userfault fd is doing in hugepage size in case of hugetlbfs memory backend. > > (Please refer to ram_state_init() on init of RAMBlock.bmap) I thought to use bitmap per ramblock, but I decided to not do it, because of following reasons: 1. There is only 4k ramblocks, for such ramblock it's not optimal 2. copied pages bitmap - it's attribute of incoming migration, but ramblock it's general structure, although bmap it's about dirty pages of precopy migrations, hmm, but RAMBlock also contains unsentmap and it's for postcopy. > > > + * > > + * */ > > + unsigned long *copied_pages; > > + > > + /* > > * PostcopyBlocktimeContext to keep information for postcopy > > * live migration, to calculate vCPU block time > > * */ > > @@ -279,6 +293,8 @@ int migrate_compress_threads(void); > > int migrate_decompress_threads(void); > > bool migrate_use_events(void); > > bool migrate_postcopy_blocktime(void); > > +unsigned long int get_copied_bit_offset(ram_addr_t addr); > > +unsigned long int *copied_pages_bitmap_new(void); > > > > /* Sending on the return path - generic and then for each message type */ > > void migrate_send_rp_message(MigrationIncomingState *mis, > > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c > > index 5435a40..d647769 100644 > > --- a/migration/postcopy-ram.c > > +++ b/migration/postcopy-ram.c > > @@ -94,23 +94,34 @@ static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx) > > > > static void postcopy_migration_cb(Notifier *n, void *data) > > { > > - PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, > > - postcopy_notifier); > > MigrationState *s = data; > > if (migration_has_finished(s) || migration_has_failed(s)) { > > + MigrationIncomingState *mis = migration_incoming_get_current(); > > + PostcopyBlocktimeContext *ctx = mis->blocktime_ctx; > > + > > + if (!ctx) { > > + return; > > + } > > If we are in postcopy_migration_cb() already, then we have registered > the notifiers, then... could this (!ctx) really happen? > ok, maybe it's unnecessary sanity check. > > + > > g_free(ctx->page_fault_vcpu_time); > > /* g_free is NULL robust */ > > ctx->page_fault_vcpu_time = NULL; > > g_free(ctx->vcpu_addr); > > ctx->vcpu_addr = NULL; > > + g_free(mis->copied_pages); > > + mis->copied_pages = NULL; > > } > > } > > > > static void migration_exit_cb(Notifier *n, void *data) > > { > > - PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, > > - exit_notifier); > > + MigrationIncomingState *mis = migration_incoming_get_current(); > > + PostcopyBlocktimeContext *ctx = mis->blocktime_ctx; > > + if (!ctx) { > > + return; > > + } > > I failed to find out why touched this up... > > Thanks, > > > destroy_blocktime_context(ctx); > > + mis->blocktime_ctx = NULL; > > } > > > > static struct PostcopyBlocktimeContext *blocktime_context_new(void) > > @@ -227,6 +238,9 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) > > mis->blocktime_ctx = blocktime_context_new(); > > } > > > > + if (!mis->copied_pages) { > > + mis->copied_pages = copied_pages_bitmap_new(); > > + } > > asked_features |= UFFD_FEATURE_THREAD_ID; > > } > > #endif > > diff --git a/migration/ram.c b/migration/ram.c > > index f59fdd4..1abb6bb 100644 > > --- a/migration/ram.c > > +++ b/migration/ram.c > > @@ -2661,6 +2661,69 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) > > return ret; > > } > > > > +static unsigned long get_total_bits_per_page(ram_addr_t mem_length, > > + size_t page_size) > > +{ > > + unsigned long page_size_bit = find_last_bit((unsigned long *)&page_size, > > + BITS_PER_LONG); > > + unsigned long total_bits = mem_length >> page_size_bit; > > + if (mem_length % page_size) { > > + total_bits += 1; > > + } > > + return total_bits; > > +} > > + > > +/* > > + * this function allocates bitmap for copied pages, > > + * also it calculates > > + * how many entries do we need > > + * */ > > +unsigned long int *copied_pages_bitmap_new(void) > > +{ > > + RAMBlock *block; > > + unsigned long int total_bits = 0; > > + > > + rcu_read_lock(); > > + RAMBLOCK_FOREACH(block) { > > + /* in general case used_length may not be aligned > > + * by page_size */ > > + > > + total_bits += get_total_bits_per_page(block->used_length, > > + block->page_size); > > + } > > + rcu_read_unlock(); > > + > > + return bitmap_new(total_bits); > > +} > > + > > +unsigned long int get_copied_bit_offset(ram_addr_t addr) > > +{ > > + RAMBlock *block; > > + unsigned long int iter_bit = 0; > > + > > + rcu_read_lock(); > > + RAMBLOCK_FOREACH(block) { > > + /* in general case used_length may not be aligned > > + * by page_size */ > > + if (block->host == NULL) { > > + continue; > > + } > > + if (addr - (ram_addr_t)block->host < block->max_length) { > > + unsigned long page_size_bit = find_last_bit( > > + (unsigned long *)&block->page_size, > > + BITS_PER_LONG); > > + ram_addr_t offset = addr - (ram_addr_t)block->host; > > + iter_bit += offset >> page_size_bit; > > + break; > > + } > > + iter_bit += get_total_bits_per_page(block->used_length, > > + block->page_size); > > + } > > + rcu_read_unlock(); > > + > > + return iter_bit; > > +} > > + > > static SaveVMHandlers savevm_ram_handlers = { > > .save_live_setup = ram_save_setup, > > .save_live_iterate = ram_save_iterate, > > -- > > 1.8.3.1 > > > > -- > Peter Xu >
On Wed, May 24, 2017 at 10:56:37AM +0300, Alexey wrote: > On Wed, May 24, 2017 at 02:57:36PM +0800, Peter Xu wrote: > > On Tue, May 23, 2017 at 02:31:08PM +0300, Alexey Perevalov wrote: > > > This patch adds ability to track down already copied > > > pages, it's necessary for calculation vCPU block time in > > > postcopy migration feature and maybe for restore after > > > postcopy migration failure. > > > > > > Functions which work with RAMBlock are placed into ram.c, > > > due to TARGET_WORDS_BIGENDIAN is poisoned int postcopy-ram.c - > > > hardware independed code. > > > > > > Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com> > > > --- > > > include/migration/migration.h | 16 +++++++++++ > > > migration/postcopy-ram.c | 22 ++++++++++++--- > > > migration/ram.c | 63 +++++++++++++++++++++++++++++++++++++++++++ > > > 3 files changed, 97 insertions(+), 4 deletions(-) > > > > > > diff --git a/include/migration/migration.h b/include/migration/migration.h > > > index 449cb07..4e05c83 100644 > > > --- a/include/migration/migration.h > > > +++ b/include/migration/migration.h > > > @@ -101,6 +101,20 @@ struct MigrationIncomingState { > > > LoadStateEntry_Head loadvm_handlers; > > > > > > /* > > > + * bitmap indicates whether page copied, > > > + * based on ramblock offset > > > + * now it is using only for blocktime calculation in > > > + * postcopy migration, so livetime of this entry: > > > + * since user requested blocktime calculation, > > > + * till the end of postcopy migration > > > + * as an example it could represend following memory map > > > + * ___________________________________ > > > + * |4k pages | hugepages | 4k pages > > > > Can we really do this? > > > > > The problem is AFAIU migration stream is sending pages only in target > > page size, even for huge pages... so even for huge pages we should > > still need per TARGET_PAGE_SIZE bitmap? > sending maybe, but copying to userfault fd is doing in hugepage size Yes you are right. :) > in case of hugetlbfs memory backend. > > > > (Please refer to ram_state_init() on init of RAMBlock.bmap) > I thought to use bitmap per ramblock, but I decided to not do it, > because of following reasons: > 1. There is only 4k ramblocks, for such ramblock it's not > optimal Could you explain what do you mean by "there is only 4k ramblocks"? > 2. copied pages bitmap - it's attribute of incoming migration, > but ramblock it's general structure, although bmap it's about > dirty pages of precopy migrations, hmm, but RAMBlock also > contains unsentmap and it's for postcopy. I thought you mean that copied pages are specific fields for incoming so not suitable for RAMBlock struct? But the last sentence seems to not support that idea... Hmm... Actually it sounds like a good idea to me to put it into RAMBlock (but I maybe wrong). Thanks,
On 05/24/2017 03:01 PM, Peter Xu wrote: > On Wed, May 24, 2017 at 10:56:37AM +0300, Alexey wrote: >> On Wed, May 24, 2017 at 02:57:36PM +0800, Peter Xu wrote: >>> On Tue, May 23, 2017 at 02:31:08PM +0300, Alexey Perevalov wrote: >>>> This patch adds ability to track down already copied >>>> pages, it's necessary for calculation vCPU block time in >>>> postcopy migration feature and maybe for restore after >>>> postcopy migration failure. >>>> >>>> Functions which work with RAMBlock are placed into ram.c, >>>> due to TARGET_WORDS_BIGENDIAN is poisoned int postcopy-ram.c - >>>> hardware independed code. >>>> >>>> Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com> >>>> --- >>>> include/migration/migration.h | 16 +++++++++++ >>>> migration/postcopy-ram.c | 22 ++++++++++++--- >>>> migration/ram.c | 63 +++++++++++++++++++++++++++++++++++++++++++ >>>> 3 files changed, 97 insertions(+), 4 deletions(-) >>>> >>>> diff --git a/include/migration/migration.h b/include/migration/migration.h >>>> index 449cb07..4e05c83 100644 >>>> --- a/include/migration/migration.h >>>> +++ b/include/migration/migration.h >>>> @@ -101,6 +101,20 @@ struct MigrationIncomingState { >>>> LoadStateEntry_Head loadvm_handlers; >>>> >>>> /* >>>> + * bitmap indicates whether page copied, >>>> + * based on ramblock offset >>>> + * now it is using only for blocktime calculation in >>>> + * postcopy migration, so livetime of this entry: >>>> + * since user requested blocktime calculation, >>>> + * till the end of postcopy migration >>>> + * as an example it could represend following memory map >>>> + * ___________________________________ >>>> + * |4k pages | hugepages | 4k pages >>> Can we really do this? >>> The problem is AFAIU migration stream is sending pages only in target >>> page size, even for huge pages... so even for huge pages we should >>> still need per TARGET_PAGE_SIZE bitmap? >> sending maybe, but copying to userfault fd is doing in hugepage size > Yes you are right. :) > >> in case of hugetlbfs memory backend. >>> (Please refer to ram_state_init() on init of RAMBlock.bmap) >> I thought to use bitmap per ramblock, but I decided to not do it, >> because of following reasons: >> 1. There is only 4k ramblocks, for such ramblock it's not >> optimal > Could you explain what do you mean by "there is only 4k ramblocks"? sorry, could be ramblocks with 4k size, as example, your's qemu hmp info ramblock shows Block Name PSize Offset Used Total /objects/mem 2 MiB 0x0000000000000000 0x0000000020000000 0x0000000020000000 vga.vram 4 KiB 0x0000000020060000 0x0000000001000000 0x0000000001000000 /rom@etc/acpi/tables 4 KiB 0x0000000021130000 0x0000000000020000 0x0000000000200000 pc.bios 4 KiB 0x0000000020000000 0x0000000000040000 0x0000000000040000 0000:00:03.0/e1000.rom 4 KiB 0x0000000021070000 0x0000000000040000 0x0000000000040000 0000:00:04.0/e1000.rom 4 KiB 0x00000000210b0000 0x0000000000040000 0x0000000000040000 0000:00:05.0/e1000.rom 4 KiB 0x00000000210f0000 0x0000000000040000 0x0000000000040000 pc.rom 4 KiB 0x0000000020040000 0x0000000000020000 0x0000000000020000 0000:00:02.0/vga.rom 4 KiB 0x0000000021060000 0x0000000000010000 0x0000000000010000 /rom@etc/table-loader 4 KiB 0x0000000021330000 0x0000000000001000 0x0000000000001000 /rom@etc/acpi/rsdp 4 KiB 0x0000000021331000 0x0000000000001000 0x0000000000001000 /rom@etc/table-loader and /rom@etc/acpi/rsdp has only one 4K page. > >> 2. copied pages bitmap - it's attribute of incoming migration, >> but ramblock it's general structure, although bmap it's about >> dirty pages of precopy migrations, hmm, but RAMBlock also >> contains unsentmap and it's for postcopy. > I thought you mean that copied pages are specific fields for incoming > so not suitable for RAMBlock struct? But the last sentence seems to > not support that idea... Hmm... Actually it sounds like a good idea to > me to put it into RAMBlock (but I maybe wrong). > > Thanks, >
On Wed, May 24, 2017 at 03:16:23PM +0300, Alexey Perevalov wrote: > On 05/24/2017 03:01 PM, Peter Xu wrote: > >On Wed, May 24, 2017 at 10:56:37AM +0300, Alexey wrote: > >>On Wed, May 24, 2017 at 02:57:36PM +0800, Peter Xu wrote: > >>>On Tue, May 23, 2017 at 02:31:08PM +0300, Alexey Perevalov wrote: > >>>>This patch adds ability to track down already copied > >>>>pages, it's necessary for calculation vCPU block time in > >>>>postcopy migration feature and maybe for restore after > >>>>postcopy migration failure. > >>>> > >>>>Functions which work with RAMBlock are placed into ram.c, > >>>>due to TARGET_WORDS_BIGENDIAN is poisoned int postcopy-ram.c - > >>>>hardware independed code. > >>>> > >>>>Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com> > >>>>--- > >>>> include/migration/migration.h | 16 +++++++++++ > >>>> migration/postcopy-ram.c | 22 ++++++++++++--- > >>>> migration/ram.c | 63 +++++++++++++++++++++++++++++++++++++++++++ > >>>> 3 files changed, 97 insertions(+), 4 deletions(-) > >>>> > >>>>diff --git a/include/migration/migration.h b/include/migration/migration.h > >>>>index 449cb07..4e05c83 100644 > >>>>--- a/include/migration/migration.h > >>>>+++ b/include/migration/migration.h > >>>>@@ -101,6 +101,20 @@ struct MigrationIncomingState { > >>>> LoadStateEntry_Head loadvm_handlers; > >>>> /* > >>>>+ * bitmap indicates whether page copied, > >>>>+ * based on ramblock offset > >>>>+ * now it is using only for blocktime calculation in > >>>>+ * postcopy migration, so livetime of this entry: > >>>>+ * since user requested blocktime calculation, > >>>>+ * till the end of postcopy migration > >>>>+ * as an example it could represend following memory map > >>>>+ * ___________________________________ > >>>>+ * |4k pages | hugepages | 4k pages > >>>Can we really do this? > >>>The problem is AFAIU migration stream is sending pages only in target > >>>page size, even for huge pages... so even for huge pages we should > >>>still need per TARGET_PAGE_SIZE bitmap? > >>sending maybe, but copying to userfault fd is doing in hugepage size > >Yes you are right. :) > > > >>in case of hugetlbfs memory backend. > >>>(Please refer to ram_state_init() on init of RAMBlock.bmap) > >>I thought to use bitmap per ramblock, but I decided to not do it, > >>because of following reasons: > >> 1. There is only 4k ramblocks, for such ramblock it's not > >> optimal > >Could you explain what do you mean by "there is only 4k ramblocks"? > sorry, could be ramblocks with 4k size, > as example, your's qemu hmp info ramblock shows > Block Name PSize Offset Used Total > /objects/mem 2 MiB 0x0000000000000000 0x0000000020000000 > 0x0000000020000000 > vga.vram 4 KiB 0x0000000020060000 0x0000000001000000 > 0x0000000001000000 > /rom@etc/acpi/tables 4 KiB 0x0000000021130000 0x0000000000020000 > 0x0000000000200000 > pc.bios 4 KiB 0x0000000020000000 0x0000000000040000 > 0x0000000000040000 > 0000:00:03.0/e1000.rom 4 KiB 0x0000000021070000 0x0000000000040000 > 0x0000000000040000 > 0000:00:04.0/e1000.rom 4 KiB 0x00000000210b0000 0x0000000000040000 > 0x0000000000040000 > 0000:00:05.0/e1000.rom 4 KiB 0x00000000210f0000 0x0000000000040000 > 0x0000000000040000 > pc.rom 4 KiB 0x0000000020040000 0x0000000000020000 > 0x0000000000020000 > 0000:00:02.0/vga.rom 4 KiB 0x0000000021060000 0x0000000000010000 > 0x0000000000010000 > /rom@etc/table-loader 4 KiB 0x0000000021330000 0x0000000000001000 > 0x0000000000001000 > /rom@etc/acpi/rsdp 4 KiB 0x0000000021331000 0x0000000000001000 > 0x0000000000001000 > > /rom@etc/table-loader and /rom@etc/acpi/rsdp has only one 4K page. I see. Thanks for explaining this. A 4k sized ramblock means the bitmap would be only one unsigned long width (via bitmap_new(1)). I still don't see why it's not good... :-)
On 05/25/2017 02:30 AM, Peter Xu wrote: > On Wed, May 24, 2017 at 03:16:23PM +0300, Alexey Perevalov wrote: >> On 05/24/2017 03:01 PM, Peter Xu wrote: >>> On Wed, May 24, 2017 at 10:56:37AM +0300, Alexey wrote: >>>> On Wed, May 24, 2017 at 02:57:36PM +0800, Peter Xu wrote: >>>>> On Tue, May 23, 2017 at 02:31:08PM +0300, Alexey Perevalov wrote: >>>>>> This patch adds ability to track down already copied >>>>>> pages, it's necessary for calculation vCPU block time in >>>>>> postcopy migration feature and maybe for restore after >>>>>> postcopy migration failure. >>>>>> >>>>>> Functions which work with RAMBlock are placed into ram.c, >>>>>> due to TARGET_WORDS_BIGENDIAN is poisoned int postcopy-ram.c - >>>>>> hardware independed code. >>>>>> >>>>>> Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com> >>>>>> --- >>>>>> include/migration/migration.h | 16 +++++++++++ >>>>>> migration/postcopy-ram.c | 22 ++++++++++++--- >>>>>> migration/ram.c | 63 +++++++++++++++++++++++++++++++++++++++++++ >>>>>> 3 files changed, 97 insertions(+), 4 deletions(-) >>>>>> >>>>>> diff --git a/include/migration/migration.h b/include/migration/migration.h >>>>>> index 449cb07..4e05c83 100644 >>>>>> --- a/include/migration/migration.h >>>>>> +++ b/include/migration/migration.h >>>>>> @@ -101,6 +101,20 @@ struct MigrationIncomingState { >>>>>> LoadStateEntry_Head loadvm_handlers; >>>>>> /* >>>>>> + * bitmap indicates whether page copied, >>>>>> + * based on ramblock offset >>>>>> + * now it is using only for blocktime calculation in >>>>>> + * postcopy migration, so livetime of this entry: >>>>>> + * since user requested blocktime calculation, >>>>>> + * till the end of postcopy migration >>>>>> + * as an example it could represend following memory map >>>>>> + * ___________________________________ >>>>>> + * |4k pages | hugepages | 4k pages >>>>> Can we really do this? >>>>> The problem is AFAIU migration stream is sending pages only in target >>>>> page size, even for huge pages... so even for huge pages we should >>>>> still need per TARGET_PAGE_SIZE bitmap? >>>> sending maybe, but copying to userfault fd is doing in hugepage size >>> Yes you are right. :) >>> >>>> in case of hugetlbfs memory backend. >>>>> (Please refer to ram_state_init() on init of RAMBlock.bmap) >>>> I thought to use bitmap per ramblock, but I decided to not do it, >>>> because of following reasons: >>>> 1. There is only 4k ramblocks, for such ramblock it's not >>>> optimal >>> Could you explain what do you mean by "there is only 4k ramblocks"? >> sorry, could be ramblocks with 4k size, >> as example, your's qemu hmp info ramblock shows >> Block Name PSize Offset Used Total >> /objects/mem 2 MiB 0x0000000000000000 0x0000000020000000 >> 0x0000000020000000 >> vga.vram 4 KiB 0x0000000020060000 0x0000000001000000 >> 0x0000000001000000 >> /rom@etc/acpi/tables 4 KiB 0x0000000021130000 0x0000000000020000 >> 0x0000000000200000 >> pc.bios 4 KiB 0x0000000020000000 0x0000000000040000 >> 0x0000000000040000 >> 0000:00:03.0/e1000.rom 4 KiB 0x0000000021070000 0x0000000000040000 >> 0x0000000000040000 >> 0000:00:04.0/e1000.rom 4 KiB 0x00000000210b0000 0x0000000000040000 >> 0x0000000000040000 >> 0000:00:05.0/e1000.rom 4 KiB 0x00000000210f0000 0x0000000000040000 >> 0x0000000000040000 >> pc.rom 4 KiB 0x0000000020040000 0x0000000000020000 >> 0x0000000000020000 >> 0000:00:02.0/vga.rom 4 KiB 0x0000000021060000 0x0000000000010000 >> 0x0000000000010000 >> /rom@etc/table-loader 4 KiB 0x0000000021330000 0x0000000000001000 >> 0x0000000000001000 >> /rom@etc/acpi/rsdp 4 KiB 0x0000000021331000 0x0000000000001000 >> 0x0000000000001000 >> >> /rom@etc/table-loader and /rom@etc/acpi/rsdp has only one 4K page. > I see. Thanks for explaining this. > > A 4k sized ramblock means the bitmap would be only one unsigned long > width (via bitmap_new(1)). I still don't see why it's not good... :-) > Ok, I'll take into account your comment, and I'll place copied bitmap into RAMBlock.
On Thu, May 25, 2017 at 09:28:57AM +0300, Alexey Perevalov wrote: > On 05/25/2017 02:30 AM, Peter Xu wrote: > >On Wed, May 24, 2017 at 03:16:23PM +0300, Alexey Perevalov wrote: > >>On 05/24/2017 03:01 PM, Peter Xu wrote: > >>>On Wed, May 24, 2017 at 10:56:37AM +0300, Alexey wrote: > >>>>On Wed, May 24, 2017 at 02:57:36PM +0800, Peter Xu wrote: > >>>>>On Tue, May 23, 2017 at 02:31:08PM +0300, Alexey Perevalov wrote: > >>>>>>This patch adds ability to track down already copied > >>>>>>pages, it's necessary for calculation vCPU block time in > >>>>>>postcopy migration feature and maybe for restore after > >>>>>>postcopy migration failure. > >>>>>> > >>>>>>Functions which work with RAMBlock are placed into ram.c, > >>>>>>due to TARGET_WORDS_BIGENDIAN is poisoned int postcopy-ram.c - > >>>>>>hardware independed code. > >>>>>> > >>>>>>Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com> > >>>>>>--- > >>>>>> include/migration/migration.h | 16 +++++++++++ > >>>>>> migration/postcopy-ram.c | 22 ++++++++++++--- > >>>>>> migration/ram.c | 63 +++++++++++++++++++++++++++++++++++++++++++ > >>>>>> 3 files changed, 97 insertions(+), 4 deletions(-) > >>>>>> > >>>>>>diff --git a/include/migration/migration.h b/include/migration/migration.h > >>>>>>index 449cb07..4e05c83 100644 > >>>>>>--- a/include/migration/migration.h > >>>>>>+++ b/include/migration/migration.h > >>>>>>@@ -101,6 +101,20 @@ struct MigrationIncomingState { > >>>>>> LoadStateEntry_Head loadvm_handlers; > >>>>>> /* > >>>>>>+ * bitmap indicates whether page copied, > >>>>>>+ * based on ramblock offset > >>>>>>+ * now it is using only for blocktime calculation in > >>>>>>+ * postcopy migration, so livetime of this entry: > >>>>>>+ * since user requested blocktime calculation, > >>>>>>+ * till the end of postcopy migration > >>>>>>+ * as an example it could represend following memory map > >>>>>>+ * ___________________________________ > >>>>>>+ * |4k pages | hugepages | 4k pages > >>>>>Can we really do this? > >>>>>The problem is AFAIU migration stream is sending pages only in target > >>>>>page size, even for huge pages... so even for huge pages we should > >>>>>still need per TARGET_PAGE_SIZE bitmap? > >>>>sending maybe, but copying to userfault fd is doing in hugepage size > >>>Yes you are right. :) > >>> > >>>>in case of hugetlbfs memory backend. > >>>>>(Please refer to ram_state_init() on init of RAMBlock.bmap) > >>>>I thought to use bitmap per ramblock, but I decided to not do it, > >>>>because of following reasons: > >>>> 1. There is only 4k ramblocks, for such ramblock it's not > >>>> optimal > >>>Could you explain what do you mean by "there is only 4k ramblocks"? > >>sorry, could be ramblocks with 4k size, > >>as example, your's qemu hmp info ramblock shows > >> Block Name PSize Offset Used Total > >> /objects/mem 2 MiB 0x0000000000000000 0x0000000020000000 > >>0x0000000020000000 > >> vga.vram 4 KiB 0x0000000020060000 0x0000000001000000 > >>0x0000000001000000 > >> /rom@etc/acpi/tables 4 KiB 0x0000000021130000 0x0000000000020000 > >>0x0000000000200000 > >> pc.bios 4 KiB 0x0000000020000000 0x0000000000040000 > >>0x0000000000040000 > >> 0000:00:03.0/e1000.rom 4 KiB 0x0000000021070000 0x0000000000040000 > >>0x0000000000040000 > >> 0000:00:04.0/e1000.rom 4 KiB 0x00000000210b0000 0x0000000000040000 > >>0x0000000000040000 > >> 0000:00:05.0/e1000.rom 4 KiB 0x00000000210f0000 0x0000000000040000 > >>0x0000000000040000 > >> pc.rom 4 KiB 0x0000000020040000 0x0000000000020000 > >>0x0000000000020000 > >> 0000:00:02.0/vga.rom 4 KiB 0x0000000021060000 0x0000000000010000 > >>0x0000000000010000 > >> /rom@etc/table-loader 4 KiB 0x0000000021330000 0x0000000000001000 > >>0x0000000000001000 > >> /rom@etc/acpi/rsdp 4 KiB 0x0000000021331000 0x0000000000001000 > >>0x0000000000001000 > >> > >>/rom@etc/table-loader and /rom@etc/acpi/rsdp has only one 4K page. > >I see. Thanks for explaining this. > > > >A 4k sized ramblock means the bitmap would be only one unsigned long > >width (via bitmap_new(1)). I still don't see why it's not good... :-) > > > Ok, I'll take into account your comment, and I'll place copied > > bitmap into RAMBlock. Or we can wait for further comments in case you'll do extra work. :-) Just to mention that Dave won't be here until next week, so you may need to wait for some days until he can provide some further input. Thanks,
* Peter Xu (peterx@redhat.com) wrote: > On Thu, May 25, 2017 at 09:28:57AM +0300, Alexey Perevalov wrote: > > On 05/25/2017 02:30 AM, Peter Xu wrote: > > >On Wed, May 24, 2017 at 03:16:23PM +0300, Alexey Perevalov wrote: > > >>On 05/24/2017 03:01 PM, Peter Xu wrote: > > >>>On Wed, May 24, 2017 at 10:56:37AM +0300, Alexey wrote: > > >>>>On Wed, May 24, 2017 at 02:57:36PM +0800, Peter Xu wrote: > > >>>>>On Tue, May 23, 2017 at 02:31:08PM +0300, Alexey Perevalov wrote: > > >>>>>>This patch adds ability to track down already copied > > >>>>>>pages, it's necessary for calculation vCPU block time in > > >>>>>>postcopy migration feature and maybe for restore after > > >>>>>>postcopy migration failure. > > >>>>>> > > >>>>>>Functions which work with RAMBlock are placed into ram.c, > > >>>>>>due to TARGET_WORDS_BIGENDIAN is poisoned int postcopy-ram.c - > > >>>>>>hardware independed code. > > >>>>>> > > >>>>>>Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com> > > >>>>>>--- > > >>>>>> include/migration/migration.h | 16 +++++++++++ > > >>>>>> migration/postcopy-ram.c | 22 ++++++++++++--- > > >>>>>> migration/ram.c | 63 +++++++++++++++++++++++++++++++++++++++++++ > > >>>>>> 3 files changed, 97 insertions(+), 4 deletions(-) > > >>>>>> > > >>>>>>diff --git a/include/migration/migration.h b/include/migration/migration.h > > >>>>>>index 449cb07..4e05c83 100644 > > >>>>>>--- a/include/migration/migration.h > > >>>>>>+++ b/include/migration/migration.h > > >>>>>>@@ -101,6 +101,20 @@ struct MigrationIncomingState { > > >>>>>> LoadStateEntry_Head loadvm_handlers; > > >>>>>> /* > > >>>>>>+ * bitmap indicates whether page copied, > > >>>>>>+ * based on ramblock offset > > >>>>>>+ * now it is using only for blocktime calculation in > > >>>>>>+ * postcopy migration, so livetime of this entry: > > >>>>>>+ * since user requested blocktime calculation, > > >>>>>>+ * till the end of postcopy migration > > >>>>>>+ * as an example it could represend following memory map > > >>>>>>+ * ___________________________________ > > >>>>>>+ * |4k pages | hugepages | 4k pages > > >>>>>Can we really do this? > > >>>>>The problem is AFAIU migration stream is sending pages only in target > > >>>>>page size, even for huge pages... so even for huge pages we should > > >>>>>still need per TARGET_PAGE_SIZE bitmap? > > >>>>sending maybe, but copying to userfault fd is doing in hugepage size > > >>>Yes you are right. :) > > >>> > > >>>>in case of hugetlbfs memory backend. > > >>>>>(Please refer to ram_state_init() on init of RAMBlock.bmap) > > >>>>I thought to use bitmap per ramblock, but I decided to not do it, > > >>>>because of following reasons: > > >>>> 1. There is only 4k ramblocks, for such ramblock it's not > > >>>> optimal > > >>>Could you explain what do you mean by "there is only 4k ramblocks"? > > >>sorry, could be ramblocks with 4k size, > > >>as example, your's qemu hmp info ramblock shows > > >> Block Name PSize Offset Used Total > > >> /objects/mem 2 MiB 0x0000000000000000 0x0000000020000000 > > >>0x0000000020000000 > > >> vga.vram 4 KiB 0x0000000020060000 0x0000000001000000 > > >>0x0000000001000000 > > >> /rom@etc/acpi/tables 4 KiB 0x0000000021130000 0x0000000000020000 > > >>0x0000000000200000 > > >> pc.bios 4 KiB 0x0000000020000000 0x0000000000040000 > > >>0x0000000000040000 > > >> 0000:00:03.0/e1000.rom 4 KiB 0x0000000021070000 0x0000000000040000 > > >>0x0000000000040000 > > >> 0000:00:04.0/e1000.rom 4 KiB 0x00000000210b0000 0x0000000000040000 > > >>0x0000000000040000 > > >> 0000:00:05.0/e1000.rom 4 KiB 0x00000000210f0000 0x0000000000040000 > > >>0x0000000000040000 > > >> pc.rom 4 KiB 0x0000000020040000 0x0000000000020000 > > >>0x0000000000020000 > > >> 0000:00:02.0/vga.rom 4 KiB 0x0000000021060000 0x0000000000010000 > > >>0x0000000000010000 > > >> /rom@etc/table-loader 4 KiB 0x0000000021330000 0x0000000000001000 > > >>0x0000000000001000 > > >> /rom@etc/acpi/rsdp 4 KiB 0x0000000021331000 0x0000000000001000 > > >>0x0000000000001000 > > >> > > >>/rom@etc/table-loader and /rom@etc/acpi/rsdp has only one 4K page. > > >I see. Thanks for explaining this. > > > > > >A 4k sized ramblock means the bitmap would be only one unsigned long > > >width (via bitmap_new(1)). I still don't see why it's not good... :-) > > > > > Ok, I'll take into account your comment, and I'll place copied > > > > bitmap into RAMBlock. > > Or we can wait for further comments in case you'll do extra work. :-) > > Just to mention that Dave won't be here until next week, so you may > need to wait for some days until he can provide some further input. (Catches up with mail) So I think I agree with Peter here; if we allocate the bitmap in the same way as our other bitmaps it'll perhaps be easier for people to understand both of them. Especially now Juan has changed that to be individual bitmaps per RAMBlock rather than one big bitmap; it avoids having to work out offsets into the bitmap. Dave > Thanks, > > -- > Peter Xu -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/include/migration/migration.h b/include/migration/migration.h index 449cb07..4e05c83 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -101,6 +101,20 @@ struct MigrationIncomingState { LoadStateEntry_Head loadvm_handlers; /* + * bitmap indicates whether page copied, + * based on ramblock offset + * now it is using only for blocktime calculation in + * postcopy migration, so livetime of this entry: + * since user requested blocktime calculation, + * till the end of postcopy migration + * as an example it could represend following memory map + * ___________________________________ + * |4k pages | hugepages | 4k pages + * + * */ + unsigned long *copied_pages; + + /* * PostcopyBlocktimeContext to keep information for postcopy * live migration, to calculate vCPU block time * */ @@ -279,6 +293,8 @@ int migrate_compress_threads(void); int migrate_decompress_threads(void); bool migrate_use_events(void); bool migrate_postcopy_blocktime(void); +unsigned long int get_copied_bit_offset(ram_addr_t addr); +unsigned long int *copied_pages_bitmap_new(void); /* Sending on the return path - generic and then for each message type */ void migrate_send_rp_message(MigrationIncomingState *mis, diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 5435a40..d647769 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -94,23 +94,34 @@ static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx) static void postcopy_migration_cb(Notifier *n, void *data) { - PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, - postcopy_notifier); MigrationState *s = data; if (migration_has_finished(s) || migration_has_failed(s)) { + MigrationIncomingState *mis = migration_incoming_get_current(); + PostcopyBlocktimeContext *ctx = mis->blocktime_ctx; + + if (!ctx) { + return; + } + g_free(ctx->page_fault_vcpu_time); /* g_free is NULL robust */ ctx->page_fault_vcpu_time = NULL; g_free(ctx->vcpu_addr); ctx->vcpu_addr = NULL; + g_free(mis->copied_pages); + mis->copied_pages = NULL; } } static void migration_exit_cb(Notifier *n, void *data) { - PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, - exit_notifier); + MigrationIncomingState *mis = migration_incoming_get_current(); + PostcopyBlocktimeContext *ctx = mis->blocktime_ctx; + if (!ctx) { + return; + } destroy_blocktime_context(ctx); + mis->blocktime_ctx = NULL; } static struct PostcopyBlocktimeContext *blocktime_context_new(void) @@ -227,6 +238,9 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) mis->blocktime_ctx = blocktime_context_new(); } + if (!mis->copied_pages) { + mis->copied_pages = copied_pages_bitmap_new(); + } asked_features |= UFFD_FEATURE_THREAD_ID; } #endif diff --git a/migration/ram.c b/migration/ram.c index f59fdd4..1abb6bb 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2661,6 +2661,69 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) return ret; } +static unsigned long get_total_bits_per_page(ram_addr_t mem_length, + size_t page_size) +{ + unsigned long page_size_bit = find_last_bit((unsigned long *)&page_size, + BITS_PER_LONG); + unsigned long total_bits = mem_length >> page_size_bit; + if (mem_length % page_size) { + total_bits += 1; + } + return total_bits; +} + +/* + * this function allocates bitmap for copied pages, + * also it calculates + * how many entries do we need + * */ +unsigned long int *copied_pages_bitmap_new(void) +{ + RAMBlock *block; + unsigned long int total_bits = 0; + + rcu_read_lock(); + RAMBLOCK_FOREACH(block) { + /* in general case used_length may not be aligned + * by page_size */ + + total_bits += get_total_bits_per_page(block->used_length, + block->page_size); + } + rcu_read_unlock(); + + return bitmap_new(total_bits); +} + +unsigned long int get_copied_bit_offset(ram_addr_t addr) +{ + RAMBlock *block; + unsigned long int iter_bit = 0; + + rcu_read_lock(); + RAMBLOCK_FOREACH(block) { + /* in general case used_length may not be aligned + * by page_size */ + if (block->host == NULL) { + continue; + } + if (addr - (ram_addr_t)block->host < block->max_length) { + unsigned long page_size_bit = find_last_bit( + (unsigned long *)&block->page_size, + BITS_PER_LONG); + ram_addr_t offset = addr - (ram_addr_t)block->host; + iter_bit += offset >> page_size_bit; + break; + } + iter_bit += get_total_bits_per_page(block->used_length, + block->page_size); + } + rcu_read_unlock(); + + return iter_bit; +} + static SaveVMHandlers savevm_ram_handlers = { .save_live_setup = ram_save_setup, .save_live_iterate = ram_save_iterate,
This patch adds ability to track down already copied pages, it's necessary for calculation vCPU block time in postcopy migration feature and maybe for restore after postcopy migration failure. Functions which work with RAMBlock are placed into ram.c, due to TARGET_WORDS_BIGENDIAN is poisoned int postcopy-ram.c - hardware independed code. Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com> --- include/migration/migration.h | 16 +++++++++++ migration/postcopy-ram.c | 22 ++++++++++++--- migration/ram.c | 63 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 4 deletions(-)