@@ -107,6 +107,9 @@ struct xc_sr_save_ops
*/
struct xc_sr_restore_ops
{
+ /* Allocate a MFN for the given PFN */
+ int (*allocate_pfn)(struct xc_sr_context *ctx, xen_pfn_t pfn);
+
/* Convert a PFN to GFN. May return ~0UL for an invalid mapping. */
xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
@@ -331,6 +334,14 @@ struct xc_sr_context
/* HVM context blob. */
void *context;
size_t contextsz;
+
+ /* Bitmap of currently allocated PFNs during restore. */
+ xen_pfn_t *sp_extents;
+ unsigned long *attempted_1g;
+ unsigned long *attempted_2m;
+ unsigned long *allocated_pfns;
+ xen_pfn_t max_allocated_pfn;
+ unsigned long alloc_cnt;
} restore;
};
} x86_hvm;
@@ -135,6 +135,7 @@ int populate_pfns(struct xc_sr_context *ctx, unsigned count,
const xen_pfn_t *original_pfns, const uint32_t *types)
{
xc_interface *xch = ctx->xch;
+ xen_pfn_t min_pfn = original_pfns[0], max_pfn = original_pfns[0];
xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
*pfns = malloc(count * sizeof(*pfns));
unsigned i, nr_pfns = 0;
@@ -149,11 +150,18 @@ int populate_pfns(struct xc_sr_context *ctx, unsigned count,
for ( i = 0; i < count; ++i )
{
+ if (original_pfns[i] < min_pfn)
+ min_pfn = original_pfns[i];
+ if (original_pfns[i] > max_pfn)
+ max_pfn = original_pfns[i];
if ( (!types || (types &&
(types[i] != XEN_DOMCTL_PFINFO_XTAB &&
types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
!pfn_is_populated(ctx, original_pfns[i]) )
{
+ rc = ctx->restore.ops.allocate_pfn(ctx, original_pfns[i]);
+ if ( rc )
+ goto err;
rc = pfn_set_populated(ctx, original_pfns[i]);
if ( rc )
goto err;
@@ -161,6 +169,16 @@ int populate_pfns(struct xc_sr_context *ctx, unsigned count,
++nr_pfns;
}
}
+ IPRINTF("checking range %lx %lx\n", min_pfn, max_pfn);
+ while (min_pfn < max_pfn) {
+ if (!pfn_is_populated(ctx, min_pfn) && test_and_clear_bit(min_pfn, ctx->x86_hvm.restore.allocated_pfns)) {
+ xen_pfn_t pfn = min_pfn;
+ rc = xc_domain_decrease_reservation_exact(xch, ctx->domid, 1, 0, &pfn);
+ IPRINTF("free %lx %lx %d\n", min_pfn, pfn, rc);
+ }
+ min_pfn++;
+ }
+ nr_pfns = 0;
if ( nr_pfns )
{
@@ -723,6 +741,10 @@ static void cleanup(struct xc_sr_context *ctx)
NRPAGES(bitmap_size(ctx->restore.p2m_size)));
free(ctx->restore.buffered_records);
free(ctx->restore.populated_pfns);
+ free(ctx->x86_hvm.restore.sp_extents);
+ free(ctx->x86_hvm.restore.attempted_1g);
+ free(ctx->x86_hvm.restore.attempted_2m);
+ free(ctx->x86_hvm.restore.allocated_pfns);
if ( ctx->restore.ops.cleanup(ctx) )
PERROR("Failed to clean up");
}
@@ -810,6 +832,17 @@ static int restore(struct xc_sr_context *ctx)
saved_errno = errno;
saved_rc = rc;
PERROR("Restore failed");
+ {
+ unsigned long i;
+ bool a, p;
+ IPRINTF("alloc_cnt %lu\n", ctx->x86_hvm.restore.alloc_cnt);
+ for (i = 0; i < ctx->restore.p2m_size; i++) {
+ p = test_bit(i, ctx->restore.populated_pfns);
+ a = test_bit(i, ctx->x86_hvm.restore.allocated_pfns);
+ if (p != a)
+ IPRINTF("%lx a %x p %x\n", i, a, p);
+ }
+ }
done:
cleanup(ctx);
@@ -888,6 +921,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
}
ctx.restore.p2m_size = nr_pfns;
+ IPRINTF("p2m_size %lx\n", ctx.restore.p2m_size);
if ( ctx.dominfo.hvm )
{
@@ -3,6 +3,10 @@
#include "xc_sr_common_x86.h"
+#define SUPERPAGE_2MB_SHIFT 9
+#define SUPERPAGE_2MB_NR_PFNS (1UL << SUPERPAGE_2MB_SHIFT)
+#define SUPERPAGE_1GB_SHIFT 18
+#define SUPERPAGE_1GB_NR_PFNS (1UL << SUPERPAGE_1GB_SHIFT)
/*
* Process an HVM_CONTEXT record from the stream.
*/
@@ -149,6 +153,20 @@ static int x86_hvm_setup(struct xc_sr_context *ctx)
return -1;
}
+ ctx->x86_hvm.restore.sp_extents = calloc(1UL << SUPERPAGE_1GB_SHIFT, sizeof(*ctx->x86_hvm.restore.sp_extents));
+ ctx->x86_hvm.restore.attempted_1g = bitmap_alloc((ctx->restore.p2m_size >> SUPERPAGE_1GB_SHIFT) + 1);
+ ctx->x86_hvm.restore.attempted_2m = bitmap_alloc((ctx->restore.p2m_size >> SUPERPAGE_2MB_SHIFT) + 1);
+ ctx->x86_hvm.restore.max_allocated_pfn = ctx->restore.p2m_size;
+ ctx->x86_hvm.restore.allocated_pfns = bitmap_alloc(ctx->x86_hvm.restore.max_allocated_pfn + 1);
+ if (!ctx->x86_hvm.restore.sp_extents || !ctx->x86_hvm.restore.allocated_pfns || !ctx->x86_hvm.restore.attempted_2m || !ctx->x86_hvm.restore.attempted_1g)
+ {
+ ERROR("Unable to allocate memory for allocated_pfns bitmaps");
+ return -1;
+ }
+ /* No superpage in 1st 2MB due to VGA hole */
+ set_bit(0, ctx->x86_hvm.restore.attempted_1g);
+ set_bit(0, ctx->x86_hvm.restore.attempted_2m);
+
return 0;
}
@@ -228,8 +246,139 @@ static int x86_hvm_cleanup(struct xc_sr_context *ctx)
return 0;
}
+static bool pfn_is_allocated(const struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+ if ( pfn > ctx->x86_hvm.restore.max_allocated_pfn )
+ return false;
+ return test_bit(pfn, ctx->x86_hvm.restore.allocated_pfns);
+}
+
+/*
+ * Set a pfn as allocated, expanding the tracking structures if needed. To
+ * avoid realloc()ing too excessively, the size increased to the nearest power
+ * of two large enough to contain the required pfn.
+ */
+static int pfn_set_allocated(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+ xc_interface *xch = ctx->xch;
+
+ if ( pfn > ctx->x86_hvm.restore.max_allocated_pfn )
+ {
+ xen_pfn_t new_max;
+ size_t old_sz, new_sz;
+ unsigned long *p;
+
+ /* Round up to the nearest power of two larger than pfn, less 1. */
+ new_max = pfn;
+ new_max |= new_max >> 1;
+ new_max |= new_max >> 2;
+ new_max |= new_max >> 4;
+ new_max |= new_max >> 8;
+ new_max |= new_max >> 16;
+#ifdef __x86_64__
+ new_max |= new_max >> 32;
+#endif
+
+ old_sz = bitmap_size(ctx->x86_hvm.restore.max_allocated_pfn + 1);
+ new_sz = bitmap_size(new_max + 1);
+ p = realloc(ctx->x86_hvm.restore.allocated_pfns, new_sz);
+ if ( !p )
+ {
+ ERROR("Failed to realloc allocated bitmap");
+ errno = ENOMEM;
+ return -1;
+ }
+
+ memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
+
+ ctx->x86_hvm.restore.allocated_pfns = p;
+ ctx->x86_hvm.restore.max_allocated_pfn = new_max;
+ }
+
+ assert(!test_bit(pfn, ctx->x86_hvm.restore.allocated_pfns));
+ set_bit(pfn, ctx->x86_hvm.restore.allocated_pfns);
+
+ return 0;
+}
+
+static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+ xc_interface *xch = ctx->xch;
+ bool success = false;
+ int rc = -1;
+ long done;
+ unsigned long i, nr_extents;
+ unsigned long stat_1g = 0, stat_2m = 0, stat_4k = 0;
+ unsigned long idx_1g, idx_2m;
+ unsigned long count;
+ xen_pfn_t base_pfn = 0, *sp_extents = ctx->x86_hvm.restore.sp_extents;
+
+ IPRINTF("pfn %lx\n", (long)pfn);
+ if (pfn_is_allocated(ctx, pfn))
+ return 0;
+
+ idx_1g = pfn >> SUPERPAGE_1GB_SHIFT;
+ idx_2m = pfn >> SUPERPAGE_2MB_SHIFT;
+ IPRINTF("idx_1g %lu idx_2m %lu\n", idx_1g, idx_2m);
+ if (!test_and_set_bit(idx_1g, ctx->x86_hvm.restore.attempted_1g)) {
+ count = 1UL << SUPERPAGE_1GB_SHIFT;
+ base_pfn = (pfn >> SUPERPAGE_1GB_SHIFT) << SUPERPAGE_1GB_SHIFT;
+ nr_extents = count >> SUPERPAGE_1GB_SHIFT;
+ IPRINTF("base_pfn %lx count %lu nr_extents %lu\n", (long)base_pfn, count, nr_extents);
+ for ( i = 0; i < nr_extents; i++ )
+ sp_extents[i] = base_pfn + (i<<SUPERPAGE_1GB_SHIFT);
+ done = xc_domain_populate_physmap(xch, ctx->domid, nr_extents, SUPERPAGE_1GB_SHIFT, 0, sp_extents);
+ IPRINTF("1G %lu -> %ld\n", nr_extents, done);
+ if (done > 0) {
+ success = true;
+ ctx->x86_hvm.restore.alloc_cnt += count;
+ stat_1g = done;
+ for (i = 0; i < (count >> SUPERPAGE_2MB_SHIFT); i++)
+ set_bit((base_pfn >> SUPERPAGE_2MB_SHIFT) + i, ctx->x86_hvm.restore.attempted_2m);
+ }
+ }
+
+ if (!test_and_set_bit(idx_2m, ctx->x86_hvm.restore.attempted_2m)) {
+ count = 1UL << SUPERPAGE_2MB_SHIFT;
+ base_pfn = (pfn >> SUPERPAGE_2MB_SHIFT) << SUPERPAGE_2MB_SHIFT;
+ nr_extents = count >> SUPERPAGE_2MB_SHIFT;
+ IPRINTF("base_pfn %lx count %lu nr_extents %lu\n", (long)base_pfn, count, nr_extents);
+ for ( i = 0; i < nr_extents; i++ )
+ sp_extents[i] = base_pfn + (i<<SUPERPAGE_2MB_SHIFT);
+ done = xc_domain_populate_physmap(xch, ctx->domid, nr_extents, SUPERPAGE_2MB_SHIFT, 0, sp_extents);
+ IPRINTF("2M %lu -> %ld\n", nr_extents, done);
+ if (done > 0) {
+ success = true;
+ ctx->x86_hvm.restore.alloc_cnt += count;
+ stat_2m = done;
+ }
+ }
+ if (success == false) {
+ count = 1;
+ sp_extents[0] = base_pfn = pfn;
+ done = xc_domain_populate_physmap(xch, ctx->domid, count, 0, 0, sp_extents);
+ if (done > 0) {
+ success = true;
+ ctx->x86_hvm.restore.alloc_cnt += count;
+ stat_4k = count;
+ }
+ }
+ IPRINTF("count %lu\n", count);
+ IPRINTF("1G %lu 2M %lu 4k %lu\n", stat_1g, stat_2m, stat_4k);
+ if (success == true) {
+ do {
+ count--;
+ rc = pfn_set_allocated(ctx, base_pfn + count);
+ if (rc)
+ break;
+ } while (count);
+ }
+ return rc;
+}
+
struct xc_sr_restore_ops restore_ops_x86_hvm =
{
+ .allocate_pfn = x86_hvm_allocate_pfn,
.pfn_is_valid = x86_hvm_pfn_is_valid,
.pfn_to_gfn = x86_hvm_pfn_to_gfn,
.set_gfn = x86_hvm_set_gfn,
@@ -1152,8 +1152,15 @@ static int x86_pv_cleanup(struct xc_sr_context *ctx)
return 0;
}
+static int x86_pv_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+ errno = ENOMEM;
+ return -1;
+}
+
struct xc_sr_restore_ops restore_ops_x86_pv =
{
+ .allocate_pfn = x86_pv_allocate_pfn,
.pfn_is_valid = x86_pv_pfn_is_valid,
.pfn_to_gfn = pfn_to_mfn,
.set_page_type = x86_pv_set_page_type,