diff mbox

[v6,3/3] tools/libxc: use superpages during restore of HVM guest

Message ID 20170826103332.24570-4-olaf@aepfle.de (mailing list archive)
State New, archived
Headers show

Commit Message

Olaf Hering Aug. 26, 2017, 10:33 a.m. UTC
During creating of a HVM domU meminit_hvm() tries to map superpages.
After save/restore or migration this mapping is lost, everything is
allocated in single pages. This causes a performance degradition after
migration.

Add neccessary code to preallocate a superpage for the chunk of pfns
that is received. In case a pfn was not populated on the sending side it
must be freed on the receiving side to avoid over-allocation.

The existing code for x86_pv is moved unmodified into its own file.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
 tools/libxc/xc_sr_common.h          |  25 +++-
 tools/libxc/xc_sr_restore.c         |  75 +---------
 tools/libxc/xc_sr_restore_x86_hvm.c | 288 ++++++++++++++++++++++++++++++++++++
 tools/libxc/xc_sr_restore_x86_pv.c  |  72 ++++++++-
 4 files changed, 382 insertions(+), 78 deletions(-)

Comments

Wei Liu Aug. 30, 2017, 1:48 p.m. UTC | #1
On Sat, Aug 26, 2017 at 12:33:32PM +0200, Olaf Hering wrote:
[...]
> +static int x86_hvm_populate_pfns(struct xc_sr_context *ctx, unsigned count,
> +                                 const xen_pfn_t *original_pfns,
> +                                 const uint32_t *types)
> +{
> +    xc_interface *xch = ctx->xch;
> +    xen_pfn_t pfn, min_pfn = original_pfns[0], max_pfn = original_pfns[0];
> +    unsigned i, freed = 0, order;
> +    int rc = -1;
> +
> +    for ( i = 0; i < count; ++i )
> +    {
> +        if ( original_pfns[i] < min_pfn )
> +            min_pfn = original_pfns[i];
> +        if ( original_pfns[i] > max_pfn )
> +            max_pfn = original_pfns[i];
> +    }
> +    DPRINTF("batch of %u pfns between %" PRI_xen_pfn " %" PRI_xen_pfn "\n",
> +            count, min_pfn, max_pfn);
> +
> +    for ( i = 0; i < count; ++i )
> +    {
> +        if ( (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
> +              types[i] != XEN_DOMCTL_PFINFO_BROKEN) &&
> +             !pfn_is_populated(ctx, original_pfns[i]) )
> +        {
> +            rc = x86_hvm_allocate_pfn(ctx, original_pfns[i]);
> +            if ( rc )
> +                goto err;
> +            rc = pfn_set_populated(ctx, original_pfns[i]);
> +            if ( rc )
> +                goto err;
> +        }
> +    }
>
As far as I can tell the algorithm in the patch can't handle:

1. First pfn in a batch points to start of second 1G address space
2. Second pfn in a batch points to a page in the middle of first 1G
3. Guest can only use 1G ram

This is a valid scenario in post-copy migration algorithm.

Please correct me if I'm wrong.

> +
> +    /*
> +     * Scan the entire superpage because several batches will fit into
> +     * a superpage, and it is unknown which pfn triggered the allocation.
> +     */
> +    order = SUPERPAGE_1GB_SHIFT;
> +    pfn = min_pfn = (min_pfn >> order) << order;
> +
> +    while ( pfn <= max_pfn )
> +    {
> +        struct xc_sr_bitmap *bm;
> +        bm = &ctx->x86_hvm.restore.allocated_pfns;
> +        if ( !xc_sr_bitmap_resize(bm, pfn) )
> +        {
> +            PERROR("Failed to realloc allocated_pfns %" PRI_xen_pfn, pfn);
> +            goto err;
> +        }
> +        if ( !pfn_is_populated(ctx, pfn) &&
> +            xc_sr_test_and_clear_bit(pfn, bm) ) {
> +            xen_pfn_t p = pfn;
> +            rc = xc_domain_decrease_reservation_exact(xch, ctx->domid, 1, 0, &p);
> +            if ( rc )
> +            {
> +                PERROR("Failed to release pfn %" PRI_xen_pfn, pfn);
> +                goto err;
> +            }
> +            ctx->restore.tot_pages--;
> +            freed++;
> +        }
> +        pfn++;
> +    }
> +    if ( freed )
> +        DPRINTF("freed %u between %" PRI_xen_pfn " %" PRI_xen_pfn "\n",
> +                freed, min_pfn, max_pfn);
> +
> +    rc = 0;
> +
> + err:
> +    return rc;
> +}
> +
Olaf Hering Aug. 30, 2017, 2:27 p.m. UTC | #2
On Wed, Aug 30, Wei Liu wrote:

> As far as I can tell the algorithm in the patch can't handle:
> 
> 1. First pfn in a batch points to start of second 1G address space
> 2. Second pfn in a batch points to a page in the middle of first 1G
> 3. Guest can only use 1G ram

In which way does it not handle it? Over-allocation is supposed to be
handled by the "ctx->restore.tot_pages + sp->count >
ctx->restore.max_pages" checks. Do you mean the second 1G is allocated,
then max_pages is reached, and allocation in other areas is not possible
anymore?
Can this actually happen with the available senders? If not, this is
again the missing memory map.

Olaf
Wei Liu Aug. 30, 2017, 3:14 p.m. UTC | #3
On Wed, Aug 30, 2017 at 04:27:19PM +0200, Olaf Hering wrote:
> On Wed, Aug 30, Wei Liu wrote:
> 
> > As far as I can tell the algorithm in the patch can't handle:
> > 
> > 1. First pfn in a batch points to start of second 1G address space
> > 2. Second pfn in a batch points to a page in the middle of first 1G
> > 3. Guest can only use 1G ram
> 
> In which way does it not handle it? Over-allocation is supposed to be
> handled by the "ctx->restore.tot_pages + sp->count >
> ctx->restore.max_pages" checks. Do you mean the second 1G is allocated,
> then max_pages is reached, and allocation in other areas is not possible
> anymore?

Yes.

> Can this actually happen with the available senders? If not, this is
> again the missing memory map.
> 

Probably not now, but as said, you shouldn't rely on the structure of
the stream unless it is stated in the spec.

And it can definitely happen with post-copy algorithm.
Olaf Hering Aug. 30, 2017, 3:53 p.m. UTC | #4
On Wed, Aug 30, Wei Liu wrote:

> > Can this actually happen with the available senders? If not, this is
> > again the missing memory map.
> Probably not now, but as said, you shouldn't rely on the structure of
> the stream unless it is stated in the spec.

Well, what can happen with todays implementation on the sender side is
the case of a ballooned guest with enough holes within a batch. These
will trigger 1G allocations before the releasing of memory happens. To
solve this, the releasing of memory has to happen more often, probably
after crossing each 2M boundary.


Olaf
diff mbox

Patch

diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index da2691ba79..0fa0fbea4d 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -139,6 +139,16 @@  struct xc_sr_restore_ops
      */
     int (*setup)(struct xc_sr_context *ctx);
 
+    /**
+     * Populate PFNs
+     *
+     * Given a set of pfns, obtain memory from Xen to fill the physmap for the
+     * unpopulated subset.
+     */
+    int (*populate_pfns)(struct xc_sr_context *ctx, unsigned count,
+                         const xen_pfn_t *original_pfns, const uint32_t *types);
+
+
     /**
      * Process an individual record from the stream.  The caller shall take
      * care of processing common records (e.g. END, PAGE_DATA).
@@ -224,6 +234,8 @@  struct xc_sr_context
 
             int send_back_fd;
             unsigned long p2m_size;
+            unsigned long max_pages;
+            unsigned long tot_pages;
             xc_hypercall_buffer_t dirty_bitmap_hbuf;
 
             /* From Image Header. */
@@ -336,6 +348,11 @@  struct xc_sr_context
                     /* HVM context blob. */
                     void *context;
                     size_t contextsz;
+
+                    /* Bitmap of currently allocated PFNs during restore. */
+                    struct xc_sr_bitmap attempted_1g;
+                    struct xc_sr_bitmap attempted_2m;
+                    struct xc_sr_bitmap allocated_pfns;
                 } restore;
             };
         } x86_hvm;
@@ -455,14 +472,6 @@  static inline int write_record(struct xc_sr_context *ctx,
  */
 int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec);
 
-/*
- * This would ideally be private in restore.c, but is needed by
- * x86_pv_localise_page() if we receive pagetables frames ahead of the
- * contents of the frames they point at.
- */
-int populate_pfns(struct xc_sr_context *ctx, unsigned count,
-                  const xen_pfn_t *original_pfns, const uint32_t *types);
-
 #endif
 /*
  * Local variables:
diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
index d53948e1a6..8cd9289d1a 100644
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -68,74 +68,6 @@  static int read_headers(struct xc_sr_context *ctx)
     return 0;
 }
 
-/*
- * Given a set of pfns, obtain memory from Xen to fill the physmap for the
- * unpopulated subset.  If types is NULL, no page type checking is performed
- * and all unpopulated pfns are populated.
- */
-int populate_pfns(struct xc_sr_context *ctx, unsigned count,
-                  const xen_pfn_t *original_pfns, const uint32_t *types)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
-        *pfns = malloc(count * sizeof(*pfns));
-    unsigned i, nr_pfns = 0;
-    int rc = -1;
-
-    if ( !mfns || !pfns )
-    {
-        ERROR("Failed to allocate %zu bytes for populating the physmap",
-              2 * count * sizeof(*mfns));
-        goto err;
-    }
-
-    for ( i = 0; i < count; ++i )
-    {
-        if ( (!types || (types &&
-                         (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
-                          types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
-             !pfn_is_populated(ctx, original_pfns[i]) )
-        {
-            rc = pfn_set_populated(ctx, original_pfns[i]);
-            if ( rc )
-                goto err;
-            pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
-            ++nr_pfns;
-        }
-    }
-
-    if ( nr_pfns )
-    {
-        rc = xc_domain_populate_physmap_exact(
-            xch, ctx->domid, nr_pfns, 0, 0, mfns);
-        if ( rc )
-        {
-            PERROR("Failed to populate physmap");
-            goto err;
-        }
-
-        for ( i = 0; i < nr_pfns; ++i )
-        {
-            if ( mfns[i] == INVALID_MFN )
-            {
-                ERROR("Populate physmap failed for pfn %u", i);
-                rc = -1;
-                goto err;
-            }
-
-            ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
-        }
-    }
-
-    rc = 0;
-
- err:
-    free(pfns);
-    free(mfns);
-
-    return rc;
-}
-
 /*
  * Given a list of pfns, their types, and a block of page data from the
  * stream, populate and record their types, map the relevant subset and copy
@@ -161,7 +93,7 @@  static int process_page_data(struct xc_sr_context *ctx, unsigned count,
         goto err;
     }
 
-    rc = populate_pfns(ctx, count, pfns, types);
+    rc = ctx->restore.ops.populate_pfns(ctx, count, pfns, types);
     if ( rc )
     {
         ERROR("Failed to populate pfns for batch of %u pages", count);
@@ -826,7 +758,12 @@  int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
         return -1;
     }
 
+    /* See xc_domain_getinfo */
+    ctx.restore.max_pages = ctx.dominfo.max_memkb >> (PAGE_SHIFT-10);
+    ctx.restore.tot_pages = ctx.dominfo.nr_pages;
     ctx.restore.p2m_size = nr_pfns;
+    DPRINTF("dom %u p2m_size %lx max_pages %lx",
+            ctx.domid, ctx.restore.p2m_size, ctx.restore.max_pages);
 
     if ( ctx.dominfo.hvm )
     {
diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c b/tools/libxc/xc_sr_restore_x86_hvm.c
index 1dca85354a..2b0eca0c7c 100644
--- a/tools/libxc/xc_sr_restore_x86_hvm.c
+++ b/tools/libxc/xc_sr_restore_x86_hvm.c
@@ -135,6 +135,8 @@  static int x86_hvm_localise_page(struct xc_sr_context *ctx,
 static int x86_hvm_setup(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
+    struct xc_sr_bitmap *bm;
+    unsigned long bits;
 
     if ( ctx->restore.guest_type != DHDR_TYPE_X86_HVM )
     {
@@ -149,7 +151,30 @@  static int x86_hvm_setup(struct xc_sr_context *ctx)
         return -1;
     }
 
+    bm = &ctx->x86_hvm.restore.attempted_1g;
+    bits = (ctx->restore.p2m_size >> SUPERPAGE_1GB_SHIFT) + 1;
+    if ( xc_sr_bitmap_resize(bm, bits) == false )
+        goto out;
+
+    bm = &ctx->x86_hvm.restore.attempted_2m;
+    bits = (ctx->restore.p2m_size >> SUPERPAGE_2MB_SHIFT) + 1;
+    if ( xc_sr_bitmap_resize(bm, bits) == false )
+        goto out;
+
+    bm = &ctx->x86_hvm.restore.allocated_pfns;
+    bits = ctx->restore.p2m_size + 1;
+    if ( xc_sr_bitmap_resize(bm, bits) == false )
+        goto out;
+
+    /* No superpage in 1st 2MB due to VGA hole */
+    xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_1g);
+    xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_2m);
+
     return 0;
+
+out:
+    ERROR("Unable to allocate memory for pfn bitmaps");
+    return -1;
 }
 
 /*
@@ -224,10 +249,272 @@  static int x86_hvm_stream_complete(struct xc_sr_context *ctx)
 static int x86_hvm_cleanup(struct xc_sr_context *ctx)
 {
     free(ctx->x86_hvm.restore.context);
+    xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_1g);
+    xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_2m);
+    xc_sr_bitmap_free(&ctx->x86_hvm.restore.allocated_pfns);
 
     return 0;
 }
 
+/*
+ * Set a pfn as allocated, expanding the tracking structures if needed.
+ */
+static int pfn_set_allocated(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+    xc_interface *xch = ctx->xch;
+
+    if ( !xc_sr_set_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns) )
+    {
+        ERROR("Failed to realloc allocated_pfns bitmap");
+        errno = ENOMEM;
+        return -1;
+    }
+    return 0;
+}
+
+struct x86_hvm_sp {
+    xen_pfn_t pfn;
+    xen_pfn_t base_pfn;
+    unsigned long index;
+    unsigned long count;
+};
+
+/*
+ * Try to allocate a 1GB page for this pfn, but avoid Over-allocation.
+ * If this succeeds, mark the range of 2MB pages as busy.
+ */
+static bool x86_hvm_alloc_1g(struct xc_sr_context *ctx, struct x86_hvm_sp *sp)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_bitmap *bm;
+    unsigned int order, shift;
+    int i, done;
+    xen_pfn_t extent;
+
+    bm = &ctx->x86_hvm.restore.attempted_1g;
+
+    /* Only one attempt to avoid overlapping allocation */
+    if ( xc_sr_test_and_set_bit(sp->index, bm) )
+        return false;
+
+    order = SUPERPAGE_1GB_SHIFT;
+    sp->count = 1ULL << order;
+
+    /* Allocate only if there is room for another superpage */
+    if ( ctx->restore.tot_pages + sp->count > ctx->restore.max_pages )
+        return false;
+
+    extent = sp->base_pfn = (sp->pfn >> order) << order;
+    done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extent);
+    if ( done < 0 ) {
+        PERROR("populate_physmap failed.");
+        return false;
+    }
+    if ( done == 0 )
+        return false;
+
+    DPRINTF("1G base_pfn %" PRI_xen_pfn "\n", sp->base_pfn);
+
+    /* Mark all 2MB pages as done to avoid overlapping allocation */
+    bm = &ctx->x86_hvm.restore.attempted_2m;
+    shift = SUPERPAGE_1GB_SHIFT - SUPERPAGE_2MB_SHIFT;
+    for ( i = 0; i < (sp->count >> shift); i++ )
+        xc_sr_set_bit((sp->base_pfn >> SUPERPAGE_2MB_SHIFT) + i, bm);
+
+    return true;
+}
+
+/* Allocate a 2MB page if x86_hvm_alloc_1g failed, avoid Over-allocation. */
+static bool x86_hvm_alloc_2m(struct xc_sr_context *ctx, struct x86_hvm_sp *sp)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_bitmap *bm;
+    unsigned int order;
+    int done;
+    xen_pfn_t extent;
+
+    bm = &ctx->x86_hvm.restore.attempted_2m;
+
+    /* Only one attempt to avoid overlapping allocation */
+    if ( xc_sr_test_and_set_bit(sp->index, bm) )
+        return false;
+
+    order = SUPERPAGE_2MB_SHIFT;
+    sp->count = 1ULL << order;
+
+    /* Allocate only if there is room for another superpage */
+    if ( ctx->restore.tot_pages + sp->count > ctx->restore.max_pages )
+        return false;
+
+    extent = sp->base_pfn = (sp->pfn >> order) << order;
+    done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extent);
+    if ( done < 0 ) {
+        PERROR("populate_physmap failed.");
+        return false;
+    }
+    if ( done == 0 )
+        return false;
+
+    DPRINTF("2M base_pfn %" PRI_xen_pfn "\n", sp->base_pfn);
+    return true;
+}
+
+/* Allocate a single page if x86_hvm_alloc_2m failed. */
+static bool x86_hvm_alloc_4k(struct xc_sr_context *ctx, struct x86_hvm_sp *sp)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned int order;
+    int done;
+    xen_pfn_t extent;
+
+    order = 0;
+    sp->count = 1ULL << order;
+
+    /* Allocate only if there is room for another page */
+    if ( ctx->restore.tot_pages + sp->count > ctx->restore.max_pages )
+        return false;
+
+    extent = sp->base_pfn = (sp->pfn >> order) << order;
+    done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extent);
+    if ( done < 0 ) {
+        PERROR("populate_physmap failed.");
+        return false;
+    }
+    if ( done == 0 )
+        return false;
+
+    DPRINTF("4K base_pfn %" PRI_xen_pfn "\n", sp->base_pfn);
+    return true;
+}
+/*
+ * Attempt to allocate a superpage where the pfn resides.
+ */
+static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+    xc_interface *xch = ctx->xch;
+    bool success;
+    int rc = -1;
+    unsigned long idx_1g, idx_2m;
+    struct x86_hvm_sp sp = {
+        .pfn = pfn
+    };
+
+    if ( xc_sr_test_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns) )
+        return 0;
+
+    idx_1g = pfn >> SUPERPAGE_1GB_SHIFT;
+    idx_2m = pfn >> SUPERPAGE_2MB_SHIFT;
+    if ( !xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_1g, idx_1g) )
+    {
+        PERROR("Failed to realloc attempted_1g");
+        return -1;
+    }
+    if ( !xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_2m, idx_2m) )
+    {
+        PERROR("Failed to realloc attempted_2m");
+        return -1;
+    }
+
+    sp.index = idx_1g;
+    success = x86_hvm_alloc_1g(ctx, &sp);
+
+    if ( success == false ) {
+        sp.index = idx_2m;
+        success = x86_hvm_alloc_2m(ctx, &sp);
+    }
+
+    if ( success == false ) {
+        sp.index = 0;
+        success = x86_hvm_alloc_4k(ctx, &sp);
+    }
+
+    if ( success == true ) {
+        do {
+            sp.count--;
+            ctx->restore.tot_pages++;
+            rc = pfn_set_allocated(ctx, sp.base_pfn + sp.count);
+            if ( rc )
+                break;
+        } while ( sp.count );
+    }
+    return rc;
+}
+
+static int x86_hvm_populate_pfns(struct xc_sr_context *ctx, unsigned count,
+                                 const xen_pfn_t *original_pfns,
+                                 const uint32_t *types)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t pfn, min_pfn = original_pfns[0], max_pfn = original_pfns[0];
+    unsigned i, freed = 0, order;
+    int rc = -1;
+
+    for ( i = 0; i < count; ++i )
+    {
+        if ( original_pfns[i] < min_pfn )
+            min_pfn = original_pfns[i];
+        if ( original_pfns[i] > max_pfn )
+            max_pfn = original_pfns[i];
+    }
+    DPRINTF("batch of %u pfns between %" PRI_xen_pfn " %" PRI_xen_pfn "\n",
+            count, min_pfn, max_pfn);
+
+    for ( i = 0; i < count; ++i )
+    {
+        if ( (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
+              types[i] != XEN_DOMCTL_PFINFO_BROKEN) &&
+             !pfn_is_populated(ctx, original_pfns[i]) )
+        {
+            rc = x86_hvm_allocate_pfn(ctx, original_pfns[i]);
+            if ( rc )
+                goto err;
+            rc = pfn_set_populated(ctx, original_pfns[i]);
+            if ( rc )
+                goto err;
+        }
+    }
+
+    /*
+     * Scan the entire superpage because several batches will fit into
+     * a superpage, and it is unknown which pfn triggered the allocation.
+     */
+    order = SUPERPAGE_1GB_SHIFT;
+    pfn = min_pfn = (min_pfn >> order) << order;
+
+    while ( pfn <= max_pfn )
+    {
+        struct xc_sr_bitmap *bm;
+        bm = &ctx->x86_hvm.restore.allocated_pfns;
+        if ( !xc_sr_bitmap_resize(bm, pfn) )
+        {
+            PERROR("Failed to realloc allocated_pfns %" PRI_xen_pfn, pfn);
+            goto err;
+        }
+        if ( !pfn_is_populated(ctx, pfn) &&
+            xc_sr_test_and_clear_bit(pfn, bm) ) {
+            xen_pfn_t p = pfn;
+            rc = xc_domain_decrease_reservation_exact(xch, ctx->domid, 1, 0, &p);
+            if ( rc )
+            {
+                PERROR("Failed to release pfn %" PRI_xen_pfn, pfn);
+                goto err;
+            }
+            ctx->restore.tot_pages--;
+            freed++;
+        }
+        pfn++;
+    }
+    if ( freed )
+        DPRINTF("freed %u between %" PRI_xen_pfn " %" PRI_xen_pfn "\n",
+                freed, min_pfn, max_pfn);
+
+    rc = 0;
+
+ err:
+    return rc;
+}
+
+
 struct xc_sr_restore_ops restore_ops_x86_hvm =
 {
     .pfn_is_valid    = x86_hvm_pfn_is_valid,
@@ -236,6 +523,7 @@  struct xc_sr_restore_ops restore_ops_x86_hvm =
     .set_page_type   = x86_hvm_set_page_type,
     .localise_page   = x86_hvm_localise_page,
     .setup           = x86_hvm_setup,
+    .populate_pfns   = x86_hvm_populate_pfns,
     .process_record  = x86_hvm_process_record,
     .stream_complete = x86_hvm_stream_complete,
     .cleanup         = x86_hvm_cleanup,
diff --git a/tools/libxc/xc_sr_restore_x86_pv.c b/tools/libxc/xc_sr_restore_x86_pv.c
index 50e25c162c..87957559bc 100644
--- a/tools/libxc/xc_sr_restore_x86_pv.c
+++ b/tools/libxc/xc_sr_restore_x86_pv.c
@@ -936,6 +936,75 @@  static void x86_pv_set_gfn(struct xc_sr_context *ctx, xen_pfn_t pfn,
         ((uint32_t *)ctx->x86_pv.p2m)[pfn] = mfn;
 }
 
+/*
+ * Given a set of pfns, obtain memory from Xen to fill the physmap for the
+ * unpopulated subset.  If types is NULL, no page type checking is performed
+ * and all unpopulated pfns are populated.
+ */
+static int x86_pv_populate_pfns(struct xc_sr_context *ctx, unsigned count,
+                                const xen_pfn_t *original_pfns,
+                                const uint32_t *types)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
+        *pfns = malloc(count * sizeof(*pfns));
+    unsigned i, nr_pfns = 0;
+    int rc = -1;
+
+    if ( !mfns || !pfns )
+    {
+        ERROR("Failed to allocate %zu bytes for populating the physmap",
+              2 * count * sizeof(*mfns));
+        goto err;
+    }
+
+    for ( i = 0; i < count; ++i )
+    {
+        if ( (!types || (types &&
+                         (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
+                          types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
+             !pfn_is_populated(ctx, original_pfns[i]) )
+        {
+            rc = pfn_set_populated(ctx, original_pfns[i]);
+            if ( rc )
+                goto err;
+            pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
+            ++nr_pfns;
+        }
+    }
+
+    if ( nr_pfns )
+    {
+        rc = xc_domain_populate_physmap_exact(
+            xch, ctx->domid, nr_pfns, 0, 0, mfns);
+        if ( rc )
+        {
+            PERROR("Failed to populate physmap");
+            goto err;
+        }
+
+        for ( i = 0; i < nr_pfns; ++i )
+        {
+            if ( mfns[i] == INVALID_MFN )
+            {
+                ERROR("Populate physmap failed for pfn %u", i);
+                rc = -1;
+                goto err;
+            }
+
+            ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
+        }
+    }
+
+    rc = 0;
+
+ err:
+    free(pfns);
+    free(mfns);
+
+    return rc;
+}
+
 /*
  * restore_ops function.  Convert pfns back to mfns in pagetables.  Possibly
  * needs to populate new frames if a PTE is found referring to a frame which
@@ -980,7 +1049,7 @@  static int x86_pv_localise_page(struct xc_sr_context *ctx,
         }
     }
 
-    if ( to_populate && populate_pfns(ctx, to_populate, pfns, NULL) )
+    if ( to_populate && x86_pv_populate_pfns(ctx, to_populate, pfns, NULL) )
         return -1;
 
     for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
@@ -1160,6 +1229,7 @@  struct xc_sr_restore_ops restore_ops_x86_pv =
     .set_gfn         = x86_pv_set_gfn,
     .localise_page   = x86_pv_localise_page,
     .setup           = x86_pv_setup,
+    .populate_pfns   = x86_pv_populate_pfns,
     .process_record  = x86_pv_process_record,
     .stream_complete = x86_pv_stream_complete,
     .cleanup         = x86_pv_cleanup,