Message ID | 20170817170133.30939-4-olaf@aepfle.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Aug 17, 2017 at 07:01:33PM +0200, Olaf Hering wrote: [...] > diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c b/tools/libxc/xc_sr_restore_x86_hvm.c > index 1dca85354a..60454148db 100644 > --- a/tools/libxc/xc_sr_restore_x86_hvm.c > +++ b/tools/libxc/xc_sr_restore_x86_hvm.c > @@ -135,6 +135,8 @@ static int x86_hvm_localise_page(struct xc_sr_context *ctx, > static int x86_hvm_setup(struct xc_sr_context *ctx) > { > xc_interface *xch = ctx->xch; > + struct xc_sr_bitmap *bm; > + unsigned long bits; > > if ( ctx->restore.guest_type != DHDR_TYPE_X86_HVM ) > { > @@ -149,7 +151,30 @@ static int x86_hvm_setup(struct xc_sr_context *ctx) > return -1; > } > > + bm = &ctx->x86_hvm.restore.attempted_1g; > + bits = (ctx->restore.p2m_size >> SUPERPAGE_1GB_SHIFT) + 1; > + if ( xc_sr_bitmap_resize(bm, bits) == false ) > + goto out; > + > + bm = &ctx->x86_hvm.restore.attempted_2m; > + bits = (ctx->restore.p2m_size >> SUPERPAGE_2MB_SHIFT) + 1; > + if ( xc_sr_bitmap_resize(bm, bits) == false ) > + goto out; > + > + bm = &ctx->x86_hvm.restore.allocated_pfns; > + bits = ctx->restore.p2m_size + 1; > + if ( xc_sr_bitmap_resize(bm, bits) == false ) > + goto out; > + > + /* No superpage in 1st 2MB due to VGA hole */ > + xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_1g); > + xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_2m); > + I don't quite get this. What about other holes such as MMIO? > return 0; > + > +out: > + ERROR("Unable to allocate memory for pfn bitmaps"); > + return -1; > } > > /* > @@ -224,10 +249,164 @@ static int x86_hvm_stream_complete(struct xc_sr_context *ctx) > static int x86_hvm_cleanup(struct xc_sr_context *ctx) > { > free(ctx->x86_hvm.restore.context); > + xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_1g); > + xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_2m); > + xc_sr_bitmap_free(&ctx->x86_hvm.restore.allocated_pfns); > > return 0; > } > > +/* > + * Set a pfn as allocated, expanding the tracking structures if needed. > + */ > +static int pfn_set_allocated(struct xc_sr_context *ctx, xen_pfn_t pfn) > +{ > + xc_interface *xch = ctx->xch; > + > + if ( !xc_sr_set_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns) ) > + { > + ERROR("Failed to realloc allocated_pfns bitmap"); > + errno = ENOMEM; > + return -1; > + } > + return 0; > +} > + > +/* > + * Attempt to allocate a superpage where the pfn resides. > + */ > +static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn) > +{ > + xc_interface *xch = ctx->xch; > + bool success = false; > + int rc = -1, done; > + unsigned int order; > + unsigned long i; > + unsigned long stat_1g = 0, stat_2m = 0, stat_4k = 0; > + unsigned long idx_1g, idx_2m; > + unsigned long count; > + xen_pfn_t base_pfn = 0, extnt; > + > + if (xc_sr_test_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns)) Style is wrong here and in some other places. > + return 0; > + > + idx_1g = pfn >> SUPERPAGE_1GB_SHIFT; > + idx_2m = pfn >> SUPERPAGE_2MB_SHIFT; > + if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_1g, idx_1g)) > + { > + PERROR("Failed to realloc attempted_1g"); > + return -1; > + } > + if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_2m, idx_2m)) > + { > + PERROR("Failed to realloc attempted_2m"); > + return -1; > + } > + DPRINTF("idx_1g %lu idx_2m %lu\n", idx_1g, idx_2m); > + if (!xc_sr_test_and_set_bit(idx_1g, &ctx->x86_hvm.restore.attempted_1g)) { > + order = SUPERPAGE_1GB_SHIFT; > + count = 1UL << order; > + base_pfn = (pfn >> order) << order; > + extnt = base_pfn; > + done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extnt); > + DPRINTF("1G base_pfn %" PRI_xen_pfn " done %d\n", base_pfn, done); > + if (done > 0) { > + struct xc_sr_bitmap *bm = &ctx->x86_hvm.restore.attempted_2m; > + success = true; > + stat_1g = done; > + for (i = 0; i < (count >> SUPERPAGE_2MB_SHIFT); i++) > + xc_sr_set_bit((base_pfn >> SUPERPAGE_2MB_SHIFT) + i, bm); > + } > + } > + > + if (!xc_sr_test_and_set_bit(idx_2m, &ctx->x86_hvm.restore.attempted_2m)) { > + order = SUPERPAGE_2MB_SHIFT; > + count = 1UL << order; > + base_pfn = (pfn >> order) << order; > + extnt = base_pfn; > + done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extnt); > + DPRINTF("2M base_pfn %" PRI_xen_pfn " done %d\n", base_pfn, done); > + if (done > 0) { > + success = true; > + stat_2m = done; > + } > + } > + if (success == false) { > + count = 1; > + extnt = base_pfn = pfn; > + done = xc_domain_populate_physmap(xch, ctx->domid, count, 0, 0, &extnt); > + if (done > 0) { > + success = true; > + stat_4k = count; > + } > + } > + DPRINTF("count %lu 1G %lu 2M %lu 4k %lu\n", count, stat_1g, stat_2m, stat_4k); > + if (success == true) { > + do { > + count--; > + rc = pfn_set_allocated(ctx, base_pfn + count); > + if (rc) > + break; > + } while (count); > + } > + return rc; > +} > + > +static int x86_hvm_populate_pfns(struct xc_sr_context *ctx, unsigned count, > + const xen_pfn_t *original_pfns, > + const uint32_t *types) > +{ > + xc_interface *xch = ctx->xch; > + xen_pfn_t min_pfn = original_pfns[0], max_pfn = original_pfns[0]; > + unsigned i; > + int rc = -1; > + > + for ( i = 0; i < count; ++i ) > + { > + if (original_pfns[i] < min_pfn) > + min_pfn = original_pfns[i]; > + if (original_pfns[i] > max_pfn) > + max_pfn = original_pfns[i]; > + if ( (types[i] != XEN_DOMCTL_PFINFO_XTAB && > + types[i] != XEN_DOMCTL_PFINFO_BROKEN) && > + !pfn_is_populated(ctx, original_pfns[i]) ) > + { > + rc = x86_hvm_allocate_pfn(ctx, original_pfns[i]); > + if ( rc ) > + goto err; > + rc = pfn_set_populated(ctx, original_pfns[i]); > + if ( rc ) > + goto err; > + } > + } One potential issue I can see with your algorithm is, if the stream of page info contains pages from different super pages, the risk of going over memory limit is high (hence failing the migration). Is my concern unfounded? > + > + while (min_pfn < max_pfn) > + { > + if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.allocated_pfns, min_pfn)) > + { > + PERROR("Failed to realloc allocated_pfns %" PRI_xen_pfn, min_pfn); > + goto err; > + } > + if (!pfn_is_populated(ctx, min_pfn) && > + xc_sr_test_and_clear_bit(min_pfn, &ctx->x86_hvm.restore.allocated_pfns)) { > + xen_pfn_t pfn = min_pfn; > + rc = xc_domain_decrease_reservation_exact(xch, ctx->domid, 1, 0, &pfn); > + if ( rc ) > + { > + PERROR("Failed to release pfn %" PRI_xen_pfn, min_pfn); > + goto err; > + } > + } > + min_pfn++; > + } > + > + rc = 0; > + > + err: > + return rc; > +} > + > + > struct xc_sr_restore_ops restore_ops_x86_hvm = > { > .pfn_is_valid = x86_hvm_pfn_is_valid, > @@ -236,6 +415,7 @@ struct xc_sr_restore_ops restore_ops_x86_hvm = > .set_page_type = x86_hvm_set_page_type, > .localise_page = x86_hvm_localise_page, > .setup = x86_hvm_setup, > + .populate_pfns = x86_hvm_populate_pfns, > .process_record = x86_hvm_process_record, > .stream_complete = x86_hvm_stream_complete, > .cleanup = x86_hvm_cleanup, > diff --git a/tools/libxc/xc_sr_restore_x86_pv.c b/tools/libxc/xc_sr_restore_x86_pv.c > index 50e25c162c..87957559bc 100644 > --- a/tools/libxc/xc_sr_restore_x86_pv.c > +++ b/tools/libxc/xc_sr_restore_x86_pv.c > @@ -936,6 +936,75 @@ static void x86_pv_set_gfn(struct xc_sr_context *ctx, xen_pfn_t pfn, > ((uint32_t *)ctx->x86_pv.p2m)[pfn] = mfn; > } > > +/* > + * Given a set of pfns, obtain memory from Xen to fill the physmap for the > + * unpopulated subset. If types is NULL, no page type checking is performed > + * and all unpopulated pfns are populated. > + */ > +static int x86_pv_populate_pfns(struct xc_sr_context *ctx, unsigned count, > + const xen_pfn_t *original_pfns, > + const uint32_t *types) > +{ > + xc_interface *xch = ctx->xch; > + xen_pfn_t *mfns = malloc(count * sizeof(*mfns)), > + *pfns = malloc(count * sizeof(*pfns)); > + unsigned i, nr_pfns = 0; > + int rc = -1; > + > + if ( !mfns || !pfns ) > + { > + ERROR("Failed to allocate %zu bytes for populating the physmap", > + 2 * count * sizeof(*mfns)); > + goto err; > + } > + > + for ( i = 0; i < count; ++i ) > + { > + if ( (!types || (types && > + (types[i] != XEN_DOMCTL_PFINFO_XTAB && > + types[i] != XEN_DOMCTL_PFINFO_BROKEN))) && > + !pfn_is_populated(ctx, original_pfns[i]) ) > + { > + rc = pfn_set_populated(ctx, original_pfns[i]); > + if ( rc ) > + goto err; > + pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i]; > + ++nr_pfns; > + } > + } > + > + if ( nr_pfns ) > + { > + rc = xc_domain_populate_physmap_exact( > + xch, ctx->domid, nr_pfns, 0, 0, mfns); > + if ( rc ) > + { > + PERROR("Failed to populate physmap"); > + goto err; > + } > + > + for ( i = 0; i < nr_pfns; ++i ) > + { > + if ( mfns[i] == INVALID_MFN ) > + { > + ERROR("Populate physmap failed for pfn %u", i); > + rc = -1; > + goto err; > + } > + > + ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]); > + } > + } > + > + rc = 0; > + > + err: > + free(pfns); > + free(mfns); > + > + return rc; > +} > +
On Tue, Aug 22, Wei Liu wrote: > On Thu, Aug 17, 2017 at 07:01:33PM +0200, Olaf Hering wrote: > > + /* No superpage in 1st 2MB due to VGA hole */ > > + xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_1g); > > + xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_2m); > I don't quite get this. What about other holes such as MMIO? This just copies what meminit_hvm does. Is there a way to know where the MMIO hole is? Maybe I just missed the MMIO part. In the worst case I think a super page is allocated, which is later split into single pages. > One potential issue I can see with your algorithm is, if the stream of > page info contains pages from different super pages, the risk of going > over memory limit is high (hence failing the migration). > > Is my concern unfounded? In my testing I have seen the case of over-allocation. Thats why I implemented the freeing of unpopulated parts. It would be nice to know how many pages are actually coming. I think this info is not available. On the other side, the first iteration sends the pfns linear. This is when the allocation actually happens. So the over-allocation will only trigger near the end, if a 1G range is allocated but only a few pages will be stored into this range. Olaf
On Tue, Aug 22, Olaf Hering wrote: > In my testing I have seen the case of over-allocation. Thats why I > implemented the freeing of unpopulated parts. It would be nice to know > how many pages are actually coming. I think this info is not available. If the receiving dom0 recognizes an over-allocation it must know how much memory a domU is supposed to have. Perhaps there is a way to retreive this info. An interesting case is ballooning during migration. Is the new amount of pages per domU actually transfered to the receiving domU? If the domU is ballooned up the other side may see the incoming domU as over-allocated. If it is ballooned down pages may be missing. Was this ever considered? Olaf
On Tue, Aug 22, 2017 at 05:53:25PM +0200, Olaf Hering wrote: > On Tue, Aug 22, Wei Liu wrote: > > > On Thu, Aug 17, 2017 at 07:01:33PM +0200, Olaf Hering wrote: > > > + /* No superpage in 1st 2MB due to VGA hole */ > > > + xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_1g); > > > + xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_2m); > > I don't quite get this. What about other holes such as MMIO? > > This just copies what meminit_hvm does. Is there a way to know where the > MMIO hole is? Maybe I just missed the MMIO part. In the worst case I > think a super page is allocated, which is later split into single pages. That's a bit different from migration. IIRC hvmloader is responsible for shuffling pages around. I don't think that is applicable to migration. > > > One potential issue I can see with your algorithm is, if the stream of > > page info contains pages from different super pages, the risk of going > > over memory limit is high (hence failing the migration). > > > > Is my concern unfounded? > > In my testing I have seen the case of over-allocation. Thats why I > implemented the freeing of unpopulated parts. It would be nice to know > how many pages are actually coming. I think this info is not available. > Not sure I follow. What do you mean by "how many pages are actually coming"? > On the other side, the first iteration sends the pfns linear. This is > when the allocation actually happens. So the over-allocation will only > trigger near the end, if a 1G range is allocated but only a few pages > will be stored into this range. This could be making too many assumptions on the data stream.
On Wed, Aug 23, 2017 at 10:05:37AM +0200, Olaf Hering wrote: > On Tue, Aug 22, Olaf Hering wrote: > > > In my testing I have seen the case of over-allocation. Thats why I > > implemented the freeing of unpopulated parts. It would be nice to know > > how many pages are actually coming. I think this info is not available. > > If the receiving dom0 recognizes an over-allocation it must know how > much memory a domU is supposed to have. Perhaps there is a way to > retreive this info. > Dom0 probably gets an error from Xen about failing to allocate memory, but I'm not sure it can tell whether it is due to DomU trying to use more than it should or Xen is oom. > An interesting case is ballooning during migration. Is the new amount of > pages per domU actually transfered to the receiving domU? If the domU is > ballooned up the other side may see the incoming domU as over-allocated. > If it is ballooned down pages may be missing. Was this ever considered? > No, I don't think that's covered.
On Wed, Aug 23, Wei Liu wrote: > On Tue, Aug 22, 2017 at 05:53:25PM +0200, Olaf Hering wrote: > > In my testing I have seen the case of over-allocation. Thats why I > > implemented the freeing of unpopulated parts. It would be nice to know > > how many pages are actually coming. I think this info is not available. > Not sure I follow. What do you mean by "how many pages are actually > coming"? This meant the expected number of pages to populate. The value of p2m_size does not represent the actual number of pages assigned to a domU. This info is stored in getdomaininfo.max_pages, which is currently not used by restore. I will see if using this value will avoid triggering the Over-allocation check. > > On the other side, the first iteration sends the pfns linear. This is > > when the allocation actually happens. So the over-allocation will only > > trigger near the end, if a 1G range is allocated but only a few pages > > will be stored into this range. > This could be making too many assumptions on the data stream. With the usage of max_pages some assumptions can be avoided. Olaf
diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h index 5d78f461af..26c45fdd6d 100644 --- a/tools/libxc/xc_sr_common.h +++ b/tools/libxc/xc_sr_common.h @@ -139,6 +139,16 @@ struct xc_sr_restore_ops */ int (*setup)(struct xc_sr_context *ctx); + /** + * Populate PFNs + * + * Given a set of pfns, obtain memory from Xen to fill the physmap for the + * unpopulated subset. + */ + int (*populate_pfns)(struct xc_sr_context *ctx, unsigned count, + const xen_pfn_t *original_pfns, const uint32_t *types); + + /** * Process an individual record from the stream. The caller shall take * care of processing common records (e.g. END, PAGE_DATA). @@ -336,6 +346,11 @@ struct xc_sr_context /* HVM context blob. */ void *context; size_t contextsz; + + /* Bitmap of currently allocated PFNs during restore. */ + struct xc_sr_bitmap attempted_1g; + struct xc_sr_bitmap attempted_2m; + struct xc_sr_bitmap allocated_pfns; } restore; }; } x86_hvm; diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c index d53948e1a6..1f9fe25b8f 100644 --- a/tools/libxc/xc_sr_restore.c +++ b/tools/libxc/xc_sr_restore.c @@ -68,74 +68,6 @@ static int read_headers(struct xc_sr_context *ctx) return 0; } -/* - * Given a set of pfns, obtain memory from Xen to fill the physmap for the - * unpopulated subset. If types is NULL, no page type checking is performed - * and all unpopulated pfns are populated. - */ -int populate_pfns(struct xc_sr_context *ctx, unsigned count, - const xen_pfn_t *original_pfns, const uint32_t *types) -{ - xc_interface *xch = ctx->xch; - xen_pfn_t *mfns = malloc(count * sizeof(*mfns)), - *pfns = malloc(count * sizeof(*pfns)); - unsigned i, nr_pfns = 0; - int rc = -1; - - if ( !mfns || !pfns ) - { - ERROR("Failed to allocate %zu bytes for populating the physmap", - 2 * count * sizeof(*mfns)); - goto err; - } - - for ( i = 0; i < count; ++i ) - { - if ( (!types || (types && - (types[i] != XEN_DOMCTL_PFINFO_XTAB && - types[i] != XEN_DOMCTL_PFINFO_BROKEN))) && - !pfn_is_populated(ctx, original_pfns[i]) ) - { - rc = pfn_set_populated(ctx, original_pfns[i]); - if ( rc ) - goto err; - pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i]; - ++nr_pfns; - } - } - - if ( nr_pfns ) - { - rc = xc_domain_populate_physmap_exact( - xch, ctx->domid, nr_pfns, 0, 0, mfns); - if ( rc ) - { - PERROR("Failed to populate physmap"); - goto err; - } - - for ( i = 0; i < nr_pfns; ++i ) - { - if ( mfns[i] == INVALID_MFN ) - { - ERROR("Populate physmap failed for pfn %u", i); - rc = -1; - goto err; - } - - ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]); - } - } - - rc = 0; - - err: - free(pfns); - free(mfns); - - return rc; -} - /* * Given a list of pfns, their types, and a block of page data from the * stream, populate and record their types, map the relevant subset and copy @@ -161,7 +93,7 @@ static int process_page_data(struct xc_sr_context *ctx, unsigned count, goto err; } - rc = populate_pfns(ctx, count, pfns, types); + rc = ctx->restore.ops.populate_pfns(ctx, count, pfns, types); if ( rc ) { ERROR("Failed to populate pfns for batch of %u pages", count); diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c b/tools/libxc/xc_sr_restore_x86_hvm.c index 1dca85354a..60454148db 100644 --- a/tools/libxc/xc_sr_restore_x86_hvm.c +++ b/tools/libxc/xc_sr_restore_x86_hvm.c @@ -135,6 +135,8 @@ static int x86_hvm_localise_page(struct xc_sr_context *ctx, static int x86_hvm_setup(struct xc_sr_context *ctx) { xc_interface *xch = ctx->xch; + struct xc_sr_bitmap *bm; + unsigned long bits; if ( ctx->restore.guest_type != DHDR_TYPE_X86_HVM ) { @@ -149,7 +151,30 @@ static int x86_hvm_setup(struct xc_sr_context *ctx) return -1; } + bm = &ctx->x86_hvm.restore.attempted_1g; + bits = (ctx->restore.p2m_size >> SUPERPAGE_1GB_SHIFT) + 1; + if ( xc_sr_bitmap_resize(bm, bits) == false ) + goto out; + + bm = &ctx->x86_hvm.restore.attempted_2m; + bits = (ctx->restore.p2m_size >> SUPERPAGE_2MB_SHIFT) + 1; + if ( xc_sr_bitmap_resize(bm, bits) == false ) + goto out; + + bm = &ctx->x86_hvm.restore.allocated_pfns; + bits = ctx->restore.p2m_size + 1; + if ( xc_sr_bitmap_resize(bm, bits) == false ) + goto out; + + /* No superpage in 1st 2MB due to VGA hole */ + xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_1g); + xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_2m); + return 0; + +out: + ERROR("Unable to allocate memory for pfn bitmaps"); + return -1; } /* @@ -224,10 +249,164 @@ static int x86_hvm_stream_complete(struct xc_sr_context *ctx) static int x86_hvm_cleanup(struct xc_sr_context *ctx) { free(ctx->x86_hvm.restore.context); + xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_1g); + xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_2m); + xc_sr_bitmap_free(&ctx->x86_hvm.restore.allocated_pfns); return 0; } +/* + * Set a pfn as allocated, expanding the tracking structures if needed. + */ +static int pfn_set_allocated(struct xc_sr_context *ctx, xen_pfn_t pfn) +{ + xc_interface *xch = ctx->xch; + + if ( !xc_sr_set_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns) ) + { + ERROR("Failed to realloc allocated_pfns bitmap"); + errno = ENOMEM; + return -1; + } + return 0; +} + +/* + * Attempt to allocate a superpage where the pfn resides. + */ +static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn) +{ + xc_interface *xch = ctx->xch; + bool success = false; + int rc = -1, done; + unsigned int order; + unsigned long i; + unsigned long stat_1g = 0, stat_2m = 0, stat_4k = 0; + unsigned long idx_1g, idx_2m; + unsigned long count; + xen_pfn_t base_pfn = 0, extnt; + + if (xc_sr_test_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns)) + return 0; + + idx_1g = pfn >> SUPERPAGE_1GB_SHIFT; + idx_2m = pfn >> SUPERPAGE_2MB_SHIFT; + if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_1g, idx_1g)) + { + PERROR("Failed to realloc attempted_1g"); + return -1; + } + if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_2m, idx_2m)) + { + PERROR("Failed to realloc attempted_2m"); + return -1; + } + DPRINTF("idx_1g %lu idx_2m %lu\n", idx_1g, idx_2m); + if (!xc_sr_test_and_set_bit(idx_1g, &ctx->x86_hvm.restore.attempted_1g)) { + order = SUPERPAGE_1GB_SHIFT; + count = 1UL << order; + base_pfn = (pfn >> order) << order; + extnt = base_pfn; + done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extnt); + DPRINTF("1G base_pfn %" PRI_xen_pfn " done %d\n", base_pfn, done); + if (done > 0) { + struct xc_sr_bitmap *bm = &ctx->x86_hvm.restore.attempted_2m; + success = true; + stat_1g = done; + for (i = 0; i < (count >> SUPERPAGE_2MB_SHIFT); i++) + xc_sr_set_bit((base_pfn >> SUPERPAGE_2MB_SHIFT) + i, bm); + } + } + + if (!xc_sr_test_and_set_bit(idx_2m, &ctx->x86_hvm.restore.attempted_2m)) { + order = SUPERPAGE_2MB_SHIFT; + count = 1UL << order; + base_pfn = (pfn >> order) << order; + extnt = base_pfn; + done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extnt); + DPRINTF("2M base_pfn %" PRI_xen_pfn " done %d\n", base_pfn, done); + if (done > 0) { + success = true; + stat_2m = done; + } + } + if (success == false) { + count = 1; + extnt = base_pfn = pfn; + done = xc_domain_populate_physmap(xch, ctx->domid, count, 0, 0, &extnt); + if (done > 0) { + success = true; + stat_4k = count; + } + } + DPRINTF("count %lu 1G %lu 2M %lu 4k %lu\n", count, stat_1g, stat_2m, stat_4k); + if (success == true) { + do { + count--; + rc = pfn_set_allocated(ctx, base_pfn + count); + if (rc) + break; + } while (count); + } + return rc; +} + +static int x86_hvm_populate_pfns(struct xc_sr_context *ctx, unsigned count, + const xen_pfn_t *original_pfns, + const uint32_t *types) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t min_pfn = original_pfns[0], max_pfn = original_pfns[0]; + unsigned i; + int rc = -1; + + for ( i = 0; i < count; ++i ) + { + if (original_pfns[i] < min_pfn) + min_pfn = original_pfns[i]; + if (original_pfns[i] > max_pfn) + max_pfn = original_pfns[i]; + if ( (types[i] != XEN_DOMCTL_PFINFO_XTAB && + types[i] != XEN_DOMCTL_PFINFO_BROKEN) && + !pfn_is_populated(ctx, original_pfns[i]) ) + { + rc = x86_hvm_allocate_pfn(ctx, original_pfns[i]); + if ( rc ) + goto err; + rc = pfn_set_populated(ctx, original_pfns[i]); + if ( rc ) + goto err; + } + } + + while (min_pfn < max_pfn) + { + if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.allocated_pfns, min_pfn)) + { + PERROR("Failed to realloc allocated_pfns %" PRI_xen_pfn, min_pfn); + goto err; + } + if (!pfn_is_populated(ctx, min_pfn) && + xc_sr_test_and_clear_bit(min_pfn, &ctx->x86_hvm.restore.allocated_pfns)) { + xen_pfn_t pfn = min_pfn; + rc = xc_domain_decrease_reservation_exact(xch, ctx->domid, 1, 0, &pfn); + if ( rc ) + { + PERROR("Failed to release pfn %" PRI_xen_pfn, min_pfn); + goto err; + } + } + min_pfn++; + } + + rc = 0; + + err: + return rc; +} + + struct xc_sr_restore_ops restore_ops_x86_hvm = { .pfn_is_valid = x86_hvm_pfn_is_valid, @@ -236,6 +415,7 @@ struct xc_sr_restore_ops restore_ops_x86_hvm = .set_page_type = x86_hvm_set_page_type, .localise_page = x86_hvm_localise_page, .setup = x86_hvm_setup, + .populate_pfns = x86_hvm_populate_pfns, .process_record = x86_hvm_process_record, .stream_complete = x86_hvm_stream_complete, .cleanup = x86_hvm_cleanup, diff --git a/tools/libxc/xc_sr_restore_x86_pv.c b/tools/libxc/xc_sr_restore_x86_pv.c index 50e25c162c..87957559bc 100644 --- a/tools/libxc/xc_sr_restore_x86_pv.c +++ b/tools/libxc/xc_sr_restore_x86_pv.c @@ -936,6 +936,75 @@ static void x86_pv_set_gfn(struct xc_sr_context *ctx, xen_pfn_t pfn, ((uint32_t *)ctx->x86_pv.p2m)[pfn] = mfn; } +/* + * Given a set of pfns, obtain memory from Xen to fill the physmap for the + * unpopulated subset. If types is NULL, no page type checking is performed + * and all unpopulated pfns are populated. + */ +static int x86_pv_populate_pfns(struct xc_sr_context *ctx, unsigned count, + const xen_pfn_t *original_pfns, + const uint32_t *types) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t *mfns = malloc(count * sizeof(*mfns)), + *pfns = malloc(count * sizeof(*pfns)); + unsigned i, nr_pfns = 0; + int rc = -1; + + if ( !mfns || !pfns ) + { + ERROR("Failed to allocate %zu bytes for populating the physmap", + 2 * count * sizeof(*mfns)); + goto err; + } + + for ( i = 0; i < count; ++i ) + { + if ( (!types || (types && + (types[i] != XEN_DOMCTL_PFINFO_XTAB && + types[i] != XEN_DOMCTL_PFINFO_BROKEN))) && + !pfn_is_populated(ctx, original_pfns[i]) ) + { + rc = pfn_set_populated(ctx, original_pfns[i]); + if ( rc ) + goto err; + pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i]; + ++nr_pfns; + } + } + + if ( nr_pfns ) + { + rc = xc_domain_populate_physmap_exact( + xch, ctx->domid, nr_pfns, 0, 0, mfns); + if ( rc ) + { + PERROR("Failed to populate physmap"); + goto err; + } + + for ( i = 0; i < nr_pfns; ++i ) + { + if ( mfns[i] == INVALID_MFN ) + { + ERROR("Populate physmap failed for pfn %u", i); + rc = -1; + goto err; + } + + ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]); + } + } + + rc = 0; + + err: + free(pfns); + free(mfns); + + return rc; +} + /* * restore_ops function. Convert pfns back to mfns in pagetables. Possibly * needs to populate new frames if a PTE is found referring to a frame which @@ -980,7 +1049,7 @@ static int x86_pv_localise_page(struct xc_sr_context *ctx, } } - if ( to_populate && populate_pfns(ctx, to_populate, pfns, NULL) ) + if ( to_populate && x86_pv_populate_pfns(ctx, to_populate, pfns, NULL) ) return -1; for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i ) @@ -1160,6 +1229,7 @@ struct xc_sr_restore_ops restore_ops_x86_pv = .set_gfn = x86_pv_set_gfn, .localise_page = x86_pv_localise_page, .setup = x86_pv_setup, + .populate_pfns = x86_pv_populate_pfns, .process_record = x86_pv_process_record, .stream_complete = x86_pv_stream_complete, .cleanup = x86_pv_cleanup,
During creating of a HVM domU meminit_hvm() tries to map superpages. After save/restore or migration this mapping is lost, everything is allocated in single pages. This causes a performance degradition after migration. Add neccessary code to preallocate a superpage for the chunk of pfns that is received. In case a pfn was not populated on the sending side it must be freed on the receiving side to avoid over-allocation. The existing code for x86_pv is moved unmodified into its own file. Signed-off-by: Olaf Hering <olaf@aepfle.de> --- tools/libxc/xc_sr_common.h | 15 +++ tools/libxc/xc_sr_restore.c | 70 +------------- tools/libxc/xc_sr_restore_x86_hvm.c | 180 ++++++++++++++++++++++++++++++++++++ tools/libxc/xc_sr_restore_x86_pv.c | 72 ++++++++++++++- 4 files changed, 267 insertions(+), 70 deletions(-)