diff mbox

dm_op: Add xendevicemodel_modified_memory_bulk.

Message ID 1489675444-34062-1-git-send-email-jennifer.herbert@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jennifer Herbert March 16, 2017, 2:44 p.m. UTC
From: Jennifer Herbert <Jennifer.Herbert@citrix.com>

This new lib devicemodel call allows multiple extents of pages to be
marked as modified in a single call.  This is something needed for a
usecase I'm working on.

The xen side of the modified_memory call has been modified to accept
an array of extents.  The devicemodle library either provides an array
of length 1, to support the original library function, or a second
function allows an array to be provided.

Signed-off-by: Jennifer Herbert <Jennifer.Herbert@citrix.com>
---
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Paul Durrant <paul.durrant@citrix.com>
---
 tools/libs/devicemodel/core.c                   |   30 ++++--
 tools/libs/devicemodel/include/xendevicemodel.h |   20 +++-
 xen/arch/x86/hvm/dm.c                           |  117 +++++++++++++++--------
 xen/include/public/hvm/dm_op.h                  |   12 ++-
 4 files changed, 125 insertions(+), 54 deletions(-)

Comments

Paul Durrant March 17, 2017, 10:42 a.m. UTC | #1
> -----Original Message-----
> From: Jennifer Herbert [mailto:jennifer.herbert@citrix.com]
> Sent: 16 March 2017 14:44
> To: Xen-devel <xen-devel@lists.xen.org>
> Cc: Jennifer Herbert <jennifer.herbert@citrix.com>; Jan Beulich
> <jbeulich@suse.com>; Ian Jackson <Ian.Jackson@citrix.com>; Wei Liu
> <wei.liu2@citrix.com>; Andrew Cooper <Andrew.Cooper3@citrix.com>; Paul
> Durrant <Paul.Durrant@citrix.com>
> Subject: [PATCH] dm_op: Add xendevicemodel_modified_memory_bulk.
> 
> From: Jennifer Herbert <Jennifer.Herbert@citrix.com>
> 
> This new lib devicemodel call allows multiple extents of pages to be
> marked as modified in a single call.  This is something needed for a
> usecase I'm working on.
> 
> The xen side of the modified_memory call has been modified to accept
> an array of extents.  The devicemodle library either provides an array
> of length 1, to support the original library function, or a second
> function allows an array to be provided.
> 
> Signed-off-by: Jennifer Herbert <Jennifer.Herbert@citrix.com>
> ---
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Ian Jackson <ian.jackson@eu.citrix.com>
> Cc: Wei Liu <wei.liu2@citrix.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Paul Durrant <paul.durrant@citrix.com>
> ---
>  tools/libs/devicemodel/core.c                   |   30 ++++--
>  tools/libs/devicemodel/include/xendevicemodel.h |   20 +++-
>  xen/arch/x86/hvm/dm.c                           |  117 +++++++++++++++--------
>  xen/include/public/hvm/dm_op.h                  |   12 ++-
>  4 files changed, 125 insertions(+), 54 deletions(-)
> 
> diff --git a/tools/libs/devicemodel/core.c b/tools/libs/devicemodel/core.c
> index a85cb49..1f7a9dc 100644
> --- a/tools/libs/devicemodel/core.c
> +++ b/tools/libs/devicemodel/core.c
> @@ -434,22 +434,36 @@ int xendevicemodel_track_dirty_vram(
>                               dirty_bitmap, (size_t)(nr + 7) / 8);
>  }
> 
> -int xendevicemodel_modified_memory(
> -    xendevicemodel_handle *dmod, domid_t domid, uint64_t first_pfn,
> -    uint32_t nr)
> +int xendevicemodel_modified_memory_bulk(
> +    xendevicemodel_handle *dmod, domid_t domid,
> +    struct xen_dm_op_modified_memory_extent *extents, uint32_t nr)
>  {
>      struct xen_dm_op op;
> -    struct xen_dm_op_modified_memory *data;
> +    struct xen_dm_op_modified_memory *header;
> +    size_t extents_size = nr * sizeof(struct
> xen_dm_op_modified_memory_extent);
> 
>      memset(&op, 0, sizeof(op));
> 
>      op.op = XEN_DMOP_modified_memory;
> -    data = &op.u.modified_memory;
> +    header = &op.u.modified_memory;
> 
> -    data->first_pfn = first_pfn;
> -    data->nr = nr;
> +    header->nr_extents = nr;
> +    header->offset = 0;
> 
> -    return xendevicemodel_op(dmod, domid, 1, &op, sizeof(op));
> +    return xendevicemodel_op(dmod, domid, 2, &op, sizeof(op),
> +                             extents, extents_size);
> +}
> +
> +int xendevicemodel_modified_memory(
> +    xendevicemodel_handle *dmod, domid_t domid, uint64_t first_pfn,
> +    uint32_t nr)
> +{
> +    struct xen_dm_op_modified_memory_extent extent;
> +
> +    extent.first_pfn = first_pfn;
> +    extent.nr = nr;
> +
> +    return xendevicemodel_modified_memory_bulk(dmod, domid, &extent,
> 1);
>  }
> 
>  int xendevicemodel_set_mem_type(
> diff --git a/tools/libs/devicemodel/include/xendevicemodel.h
> b/tools/libs/devicemodel/include/xendevicemodel.h
> index b3f600e..b88d73c 100644
> --- a/tools/libs/devicemodel/include/xendevicemodel.h
> +++ b/tools/libs/devicemodel/include/xendevicemodel.h
> @@ -236,8 +236,8 @@ int xendevicemodel_track_dirty_vram(
>      uint32_t nr, unsigned long *dirty_bitmap);
> 
>  /**
> - * This function notifies the hypervisor that a set of domain pages
> - * have been modified.
> + * This function notifies the hypervisor that a set of contiguous
> + * domain pages have been modified.
>   *
>   * @parm dmod a handle to an open devicemodel interface.
>   * @parm domid the domain id to be serviced
> @@ -249,6 +249,22 @@ int xendevicemodel_modified_memory(
>      xendevicemodel_handle *dmod, domid_t domid, uint64_t first_pfn,
>      uint32_t nr);
> 
> +

It looks like this blank line is extraneous.

> +/**
> + * This function notifies the hypervisor that a set of discontiguous
> + * domain pages have been modified.
> + *
> + * @parm dmod a handle to an open devicemodel interface.
> + * @parm domid the domain id to be serviced
> + * @parm extents an array of extent structs, which each hold
> +                 a start_pfn and nr (number of pfns).
> + * @parm nr the number of extents in the array
> + * @return 0 on success, -1 on failure.
> + */
> +int xendevicemodel_modified_memory_bulk(
> +    xendevicemodel_handle *dmod, domid_t domid,
> +    struct xen_dm_op_modified_memory_extent extents[], uint32_t nr);
> +
>  /**
>   * This function notifies the hypervisor that a set of domain pages
>   * are to be treated in a specific way. (See the definition of
> diff --git a/xen/arch/x86/hvm/dm.c b/xen/arch/x86/hvm/dm.c
> index 2122c45..0c4a820 100644
> --- a/xen/arch/x86/hvm/dm.c
> +++ b/xen/arch/x86/hvm/dm.c
> @@ -119,56 +119,96 @@ static int set_isa_irq_level(struct domain *d,
> uint8_t isa_irq,
>  }
> 
>  static int modified_memory(struct domain *d,
> -                           struct xen_dm_op_modified_memory *data)
> +                           struct xen_dm_op_modified_memory *header,
> +                           xen_dm_op_buf_t* buf)
>  {
> -    xen_pfn_t last_pfn = data->first_pfn + data->nr - 1;
> -    unsigned int iter = 0;
> -    int rc = 0;
> +    /* Process maximum of 255 pfns before checking for continuation */

255 or 256?

> +    const uint32_t max_pfns = 0xff;
> 
> -    if ( (data->first_pfn > last_pfn) ||
> -         (last_pfn > domain_get_maximum_gpfn(d)) )
> -        return -EINVAL;
> +    xen_pfn_t last_pfn;
> +    int rc = 0;
> +    uint32_t offset = header->offset;
> +    unsigned long pfn;
> +    unsigned long max_pfn;
> +    unsigned max_nr;
> +    uint32_t rem_pfns = max_pfns;
> 
>      if ( !paging_mode_log_dirty(d) )
>          return 0;
> 
> -    while ( iter < data->nr )
> +    if ( (buf->size / sizeof(struct xen_dm_op_modified_memory_extent)) <
> +        header->nr_extents )
> +        return -EINVAL;
> +
> +    while ( offset < header->nr_extents )
>      {
> -        unsigned long pfn = data->first_pfn + iter;
> -        struct page_info *page;
> +        struct xen_dm_op_modified_memory_extent extent;
> 
> -        page = get_page_from_gfn(d, pfn, NULL, P2M_UNSHARE);
> -        if ( page )
> +        if ( copy_from_guest_offset(&extent, buf->h, offset, 1) )
> +            return -EFAULT;
> +
> +        last_pfn = extent.first_pfn + extent.nr - 1;
> +
> +        if ( last_pfn > domain_get_maximum_gpfn(d) )
> +            return -EINVAL;
> +
> +        if ( extent.nr > rem_pfns )
> +           max_nr = rem_pfns;
> +        else
>          {
> -            mfn_t gmfn = _mfn(page_to_mfn(page));
> -
> -            paging_mark_dirty(d, gmfn);
> -            /*
> -             * These are most probably not page tables any more
> -             * don't take a long time and don't die either.
> -             */
> -            sh_remove_shadows(d, gmfn, 1, 0);
> -            put_page(page);
> +            max_nr = extent.nr;
> +            offset++;
>          }
> 
> -        iter++;
> +        rem_pfns -= max_nr;
> +        max_pfn = extent.first_pfn + max_nr;
> 
> -        /*
> -         * Check for continuation every 256th iteration and if the
> -         * iteration is not the last.
> -         */
> -        if ( (iter < data->nr) && ((iter & 0xff) == 0) &&
> -             hypercall_preempt_check() )
> +        pfn = extent.first_pfn;
> +        while ( pfn < max_pfn )
>          {
> -            data->first_pfn += iter;
> -            data->nr -= iter;
> +            struct page_info *page;
> +            page = get_page_from_gfn(d, pfn, NULL, P2M_UNSHARE);
> +
> +            if ( page )
> +            {
> +                mfn_t gmfn = _mfn(page_to_mfn(page));
> +
> +                paging_mark_dirty(d, gmfn);
> +                /*
> +                 * These are most probably not page tables any more
> +                 * don't take a long time and don't die either.
> +                 */
> +                sh_remove_shadows(d, gmfn, 1, 0);
> +                put_page(page);
> +            }
> +            pfn++;
> +        }
> 
> -            rc = -ERESTART;
> -            break;
> +        if ( max_nr < extent.nr )
> +        {
> +            extent.first_pfn += max_nr;
> +            extent.nr-= max_nr;

Missing space before -=.

> +            if ( copy_to_guest_offset(buf->h, offset, &extent, 1) )
> +                return -EFAULT;
>          }
> -    }
> 
> -    return rc;
> +        /*
> +         * Check for continuation every 256th pfn and if the
> +         * pfn is not the last.
> +         */
> +
> +        if ( (rem_pfns == 0) && (offset <= header->nr_extents) )
> +        {
> +            if ( hypercall_preempt_check() )
> +            {
> +                header->offset = offset;
> +                rc = -ERESTART;
> +                break;
> +            }
> +            rem_pfns += max_pfns;
> +        }
> +   }
> +   return rc;
>  }
> 
>  static bool allow_p2m_type_change(p2m_type_t old, p2m_type_t new)
> @@ -441,13 +481,8 @@ static int dm_op(domid_t domid,
>          struct xen_dm_op_modified_memory *data =
>              &op.u.modified_memory;
> 
> -        const_op = false;
> -
> -        rc = -EINVAL;
> -        if ( data->pad )
> -            break;
> -
> -        rc = modified_memory(d, data);
> +        rc = modified_memory(d, data, &bufs[1]);
> +        const_op = (rc != -ERESTART);
>          break;
>      }
> 
> diff --git a/xen/include/public/hvm/dm_op.h
> b/xen/include/public/hvm/dm_op.h
> index f54cece..1c602b1 100644
> --- a/xen/include/public/hvm/dm_op.h
> +++ b/xen/include/public/hvm/dm_op.h
> @@ -237,13 +237,19 @@ struct xen_dm_op_set_pci_link_route {
>   * XEN_DMOP_modified_memory: Notify that a set of pages were modified
> by
>   *                           an emulator.
>   *
> - * NOTE: In the event of a continuation, the @first_pfn is set to the
> - *       value of the pfn of the remaining set of pages and @nr reduced
> - *       to the size of the remaining set.

This comment should really be transferred into a comment block for xen_dm_op_modified_memory_extent below since the extents are modified in the event of a continuation.

> + * DMOP buf 1 contains an array of xen_dm_op_modified_memory_extent
> with
> + * nr_extent entries.
> + *
> + * On continuation, @offset is set to the next extent to be processed.
>   */
>  #define XEN_DMOP_modified_memory 11
> 
>  struct xen_dm_op_modified_memory {
> +    uint32_t nr_extents; /* IN - number of extents. */
> +    uint32_t offset;     /* Caller must set to 0. */

Again, I think it would be good to explain that this is used for continuations (to indicate the next extent to process) which is why it must be set to 0.

  Paul

> +};
> +
> +struct xen_dm_op_modified_memory_extent {
>      /* IN - number of contiguous pages modified */
>      uint32_t nr;
>      uint32_t pad;
> --
> 1.7.10.4
Jan Beulich March 20, 2017, 10:18 a.m. UTC | #2
>>> On 16.03.17 at 15:44, <jennifer.herbert@citrix.com> wrote:
> --- a/xen/arch/x86/hvm/dm.c
> +++ b/xen/arch/x86/hvm/dm.c
> @@ -119,56 +119,96 @@ static int set_isa_irq_level(struct domain *d, uint8_t isa_irq,
>  }
>  
>  static int modified_memory(struct domain *d,
> -                           struct xen_dm_op_modified_memory *data)
> +                           struct xen_dm_op_modified_memory *header,
> +                           xen_dm_op_buf_t* buf)
>  {
> -    xen_pfn_t last_pfn = data->first_pfn + data->nr - 1;
> -    unsigned int iter = 0;
> -    int rc = 0;
> +    /* Process maximum of 255 pfns before checking for continuation */
> +    const uint32_t max_pfns = 0xff;
>  
> -    if ( (data->first_pfn > last_pfn) ||
> -         (last_pfn > domain_get_maximum_gpfn(d)) )
> -        return -EINVAL;
> +    xen_pfn_t last_pfn;
> +    int rc = 0;
> +    uint32_t offset = header->offset;
> +    unsigned long pfn;
> +    unsigned long max_pfn;
> +    unsigned max_nr;
> +    uint32_t rem_pfns = max_pfns;

Please avoid fixed width types when you don't really need them. Also
"unsigned int" please instead of plain "unsigned".

>      if ( !paging_mode_log_dirty(d) )
>          return 0;
>  
> -    while ( iter < data->nr )
> +    if ( (buf->size / sizeof(struct xen_dm_op_modified_memory_extent)) <
> +        header->nr_extents )

Indentation.

> +        return -EINVAL;
> +
> +    while ( offset < header->nr_extents )
>      {
> -        unsigned long pfn = data->first_pfn + iter;
> -        struct page_info *page;
> +        struct xen_dm_op_modified_memory_extent extent;
>  
> -        page = get_page_from_gfn(d, pfn, NULL, P2M_UNSHARE);
> -        if ( page )
> +        if ( copy_from_guest_offset(&extent, buf->h, offset, 1) )
> +            return -EFAULT;
> +
> +        last_pfn = extent.first_pfn + extent.nr - 1;
> +
> +        if ( last_pfn > domain_get_maximum_gpfn(d) )

Where did the original "(data->first_pfn > last_pfn)" go?

> +            return -EINVAL;
> +
> +        if ( extent.nr > rem_pfns )
> +           max_nr = rem_pfns;
> +        else
>          {
> -            mfn_t gmfn = _mfn(page_to_mfn(page));
> -
> -            paging_mark_dirty(d, gmfn);
> -            /*
> -             * These are most probably not page tables any more
> -             * don't take a long time and don't die either.
> -             */
> -            sh_remove_shadows(d, gmfn, 1, 0);
> -            put_page(page);
> +            max_nr = extent.nr;
> +            offset++;
>          }
>  
> -        iter++;
> +        rem_pfns -= max_nr;
> +        max_pfn = extent.first_pfn + max_nr;
>  
> -        /*
> -         * Check for continuation every 256th iteration and if the
> -         * iteration is not the last.
> -         */
> -        if ( (iter < data->nr) && ((iter & 0xff) == 0) &&
> -             hypercall_preempt_check() )
> +        pfn = extent.first_pfn;
> +        while ( pfn < max_pfn )

The max_ prefixes chosen are somewhat problematic with a loop
condition like this: "max" commonly means the maximum valid one
rather than the first following item. Perhaps "end_pfn" and just
"nr"?

>          {
> -            data->first_pfn += iter;
> -            data->nr -= iter;
> +            struct page_info *page;
> +            page = get_page_from_gfn(d, pfn, NULL, P2M_UNSHARE);
> +
> +            if ( page )
> +            {
> +                mfn_t gmfn = _mfn(page_to_mfn(page));
> +
> +                paging_mark_dirty(d, gmfn);
> +                /*
> +                 * These are most probably not page tables any more
> +                 * don't take a long time and don't die either.
> +                 */
> +                sh_remove_shadows(d, gmfn, 1, 0);
> +                put_page(page);
> +            }
> +            pfn++;
> +        }
>  
> -            rc = -ERESTART;
> -            break;
> +        if ( max_nr < extent.nr )
> +        {
> +            extent.first_pfn += max_nr;
> +            extent.nr-= max_nr;
> +            if ( copy_to_guest_offset(buf->h, offset, &extent, 1) )
> +                return -EFAULT;

Is this copying back needed when not returning -ERESTART below?
I do realize that with the current code structure it's needed for the
copy_from above to read back the value, but even if this doesn't
look to be a double fetch vulnerability I think it would be better if
the value propagation would occur without going through guest
memory.

>          }
> -    }
>  
> -    return rc;
> +        /*
> +         * Check for continuation every 256th pfn and if the
> +         * pfn is not the last.
> +         */
> +
> +        if ( (rem_pfns == 0) && (offset <= header->nr_extents) )

DYM < instead of <= here?

> +        {
> +            if ( hypercall_preempt_check() )
> +            {
> +                header->offset = offset;
> +                rc = -ERESTART;
> +                break;
> +            }
> +            rem_pfns += max_pfns;

rem_pfns is zero prior to this anyway: Please use = instead of += .

> --- a/xen/include/public/hvm/dm_op.h
> +++ b/xen/include/public/hvm/dm_op.h
> @@ -237,13 +237,19 @@ struct xen_dm_op_set_pci_link_route {
>   * XEN_DMOP_modified_memory: Notify that a set of pages were modified by
>   *                           an emulator.
>   *
> - * NOTE: In the event of a continuation, the @first_pfn is set to the
> - *       value of the pfn of the remaining set of pages and @nr reduced
> - *       to the size of the remaining set.
> + * DMOP buf 1 contains an array of xen_dm_op_modified_memory_extent with
> + * nr_extent entries.
> + *
> + * On continuation, @offset is set to the next extent to be processed.
>   */
>  #define XEN_DMOP_modified_memory 11
>  
>  struct xen_dm_op_modified_memory {
> +    uint32_t nr_extents; /* IN - number of extents. */
> +    uint32_t offset;     /* Caller must set to 0. */

I think you should try to get away without this extra field: For the
purpose here, incrementing the handle value together with
decrementing nr_extents should be sufficient (just like done with
various other batchable hypercalls). And if the field was to stay,
"offset" is a bad name imo, as this leaves open whether this is
byte- or extent-granular. "cur_extent" perhaps?

Jan
diff mbox

Patch

diff --git a/tools/libs/devicemodel/core.c b/tools/libs/devicemodel/core.c
index a85cb49..1f7a9dc 100644
--- a/tools/libs/devicemodel/core.c
+++ b/tools/libs/devicemodel/core.c
@@ -434,22 +434,36 @@  int xendevicemodel_track_dirty_vram(
                              dirty_bitmap, (size_t)(nr + 7) / 8);
 }
 
-int xendevicemodel_modified_memory(
-    xendevicemodel_handle *dmod, domid_t domid, uint64_t first_pfn,
-    uint32_t nr)
+int xendevicemodel_modified_memory_bulk(
+    xendevicemodel_handle *dmod, domid_t domid,
+    struct xen_dm_op_modified_memory_extent *extents, uint32_t nr)
 {
     struct xen_dm_op op;
-    struct xen_dm_op_modified_memory *data;
+    struct xen_dm_op_modified_memory *header;
+    size_t extents_size = nr * sizeof(struct xen_dm_op_modified_memory_extent);
 
     memset(&op, 0, sizeof(op));
 
     op.op = XEN_DMOP_modified_memory;
-    data = &op.u.modified_memory;
+    header = &op.u.modified_memory;
 
-    data->first_pfn = first_pfn;
-    data->nr = nr;
+    header->nr_extents = nr;
+    header->offset = 0;
 
-    return xendevicemodel_op(dmod, domid, 1, &op, sizeof(op));
+    return xendevicemodel_op(dmod, domid, 2, &op, sizeof(op),
+                             extents, extents_size);
+}
+
+int xendevicemodel_modified_memory(
+    xendevicemodel_handle *dmod, domid_t domid, uint64_t first_pfn,
+    uint32_t nr)
+{
+    struct xen_dm_op_modified_memory_extent extent;
+
+    extent.first_pfn = first_pfn;
+    extent.nr = nr;
+
+    return xendevicemodel_modified_memory_bulk(dmod, domid, &extent, 1);
 }
 
 int xendevicemodel_set_mem_type(
diff --git a/tools/libs/devicemodel/include/xendevicemodel.h b/tools/libs/devicemodel/include/xendevicemodel.h
index b3f600e..b88d73c 100644
--- a/tools/libs/devicemodel/include/xendevicemodel.h
+++ b/tools/libs/devicemodel/include/xendevicemodel.h
@@ -236,8 +236,8 @@  int xendevicemodel_track_dirty_vram(
     uint32_t nr, unsigned long *dirty_bitmap);
 
 /**
- * This function notifies the hypervisor that a set of domain pages
- * have been modified.
+ * This function notifies the hypervisor that a set of contiguous
+ * domain pages have been modified.
  *
  * @parm dmod a handle to an open devicemodel interface.
  * @parm domid the domain id to be serviced
@@ -249,6 +249,22 @@  int xendevicemodel_modified_memory(
     xendevicemodel_handle *dmod, domid_t domid, uint64_t first_pfn,
     uint32_t nr);
 
+
+/**
+ * This function notifies the hypervisor that a set of discontiguous
+ * domain pages have been modified.
+ *
+ * @parm dmod a handle to an open devicemodel interface.
+ * @parm domid the domain id to be serviced
+ * @parm extents an array of extent structs, which each hold
+                 a start_pfn and nr (number of pfns).
+ * @parm nr the number of extents in the array
+ * @return 0 on success, -1 on failure.
+ */
+int xendevicemodel_modified_memory_bulk(
+    xendevicemodel_handle *dmod, domid_t domid,
+    struct xen_dm_op_modified_memory_extent extents[], uint32_t nr);
+
 /**
  * This function notifies the hypervisor that a set of domain pages
  * are to be treated in a specific way. (See the definition of
diff --git a/xen/arch/x86/hvm/dm.c b/xen/arch/x86/hvm/dm.c
index 2122c45..0c4a820 100644
--- a/xen/arch/x86/hvm/dm.c
+++ b/xen/arch/x86/hvm/dm.c
@@ -119,56 +119,96 @@  static int set_isa_irq_level(struct domain *d, uint8_t isa_irq,
 }
 
 static int modified_memory(struct domain *d,
-                           struct xen_dm_op_modified_memory *data)
+                           struct xen_dm_op_modified_memory *header,
+                           xen_dm_op_buf_t* buf)
 {
-    xen_pfn_t last_pfn = data->first_pfn + data->nr - 1;
-    unsigned int iter = 0;
-    int rc = 0;
+    /* Process maximum of 255 pfns before checking for continuation */
+    const uint32_t max_pfns = 0xff;
 
-    if ( (data->first_pfn > last_pfn) ||
-         (last_pfn > domain_get_maximum_gpfn(d)) )
-        return -EINVAL;
+    xen_pfn_t last_pfn;
+    int rc = 0;
+    uint32_t offset = header->offset;
+    unsigned long pfn;
+    unsigned long max_pfn;
+    unsigned max_nr;
+    uint32_t rem_pfns = max_pfns;
 
     if ( !paging_mode_log_dirty(d) )
         return 0;
 
-    while ( iter < data->nr )
+    if ( (buf->size / sizeof(struct xen_dm_op_modified_memory_extent)) <
+        header->nr_extents )
+        return -EINVAL;
+
+    while ( offset < header->nr_extents )
     {
-        unsigned long pfn = data->first_pfn + iter;
-        struct page_info *page;
+        struct xen_dm_op_modified_memory_extent extent;
 
-        page = get_page_from_gfn(d, pfn, NULL, P2M_UNSHARE);
-        if ( page )
+        if ( copy_from_guest_offset(&extent, buf->h, offset, 1) )
+            return -EFAULT;
+
+        last_pfn = extent.first_pfn + extent.nr - 1;
+
+        if ( last_pfn > domain_get_maximum_gpfn(d) )
+            return -EINVAL;
+
+        if ( extent.nr > rem_pfns )
+           max_nr = rem_pfns;
+        else
         {
-            mfn_t gmfn = _mfn(page_to_mfn(page));
-
-            paging_mark_dirty(d, gmfn);
-            /*
-             * These are most probably not page tables any more
-             * don't take a long time and don't die either.
-             */
-            sh_remove_shadows(d, gmfn, 1, 0);
-            put_page(page);
+            max_nr = extent.nr;
+            offset++;
         }
 
-        iter++;
+        rem_pfns -= max_nr;
+        max_pfn = extent.first_pfn + max_nr;
 
-        /*
-         * Check for continuation every 256th iteration and if the
-         * iteration is not the last.
-         */
-        if ( (iter < data->nr) && ((iter & 0xff) == 0) &&
-             hypercall_preempt_check() )
+        pfn = extent.first_pfn;
+        while ( pfn < max_pfn )
         {
-            data->first_pfn += iter;
-            data->nr -= iter;
+            struct page_info *page;
+            page = get_page_from_gfn(d, pfn, NULL, P2M_UNSHARE);
+
+            if ( page )
+            {
+                mfn_t gmfn = _mfn(page_to_mfn(page));
+
+                paging_mark_dirty(d, gmfn);
+                /*
+                 * These are most probably not page tables any more
+                 * don't take a long time and don't die either.
+                 */
+                sh_remove_shadows(d, gmfn, 1, 0);
+                put_page(page);
+            }
+            pfn++;
+        }
 
-            rc = -ERESTART;
-            break;
+        if ( max_nr < extent.nr )
+        {
+            extent.first_pfn += max_nr;
+            extent.nr-= max_nr;
+            if ( copy_to_guest_offset(buf->h, offset, &extent, 1) )
+                return -EFAULT;
         }
-    }
 
-    return rc;
+        /*
+         * Check for continuation every 256th pfn and if the
+         * pfn is not the last.
+         */
+
+        if ( (rem_pfns == 0) && (offset <= header->nr_extents) )
+        {
+            if ( hypercall_preempt_check() )
+            {
+                header->offset = offset;
+                rc = -ERESTART;
+                break;
+            }
+            rem_pfns += max_pfns;
+        }
+   }
+   return rc;
 }
 
 static bool allow_p2m_type_change(p2m_type_t old, p2m_type_t new)
@@ -441,13 +481,8 @@  static int dm_op(domid_t domid,
         struct xen_dm_op_modified_memory *data =
             &op.u.modified_memory;
 
-        const_op = false;
-
-        rc = -EINVAL;
-        if ( data->pad )
-            break;
-
-        rc = modified_memory(d, data);
+        rc = modified_memory(d, data, &bufs[1]);
+        const_op = (rc != -ERESTART);
         break;
     }
 
diff --git a/xen/include/public/hvm/dm_op.h b/xen/include/public/hvm/dm_op.h
index f54cece..1c602b1 100644
--- a/xen/include/public/hvm/dm_op.h
+++ b/xen/include/public/hvm/dm_op.h
@@ -237,13 +237,19 @@  struct xen_dm_op_set_pci_link_route {
  * XEN_DMOP_modified_memory: Notify that a set of pages were modified by
  *                           an emulator.
  *
- * NOTE: In the event of a continuation, the @first_pfn is set to the
- *       value of the pfn of the remaining set of pages and @nr reduced
- *       to the size of the remaining set.
+ * DMOP buf 1 contains an array of xen_dm_op_modified_memory_extent with
+ * nr_extent entries.
+ *
+ * On continuation, @offset is set to the next extent to be processed.
  */
 #define XEN_DMOP_modified_memory 11
 
 struct xen_dm_op_modified_memory {
+    uint32_t nr_extents; /* IN - number of extents. */
+    uint32_t offset;     /* Caller must set to 0. */
+};
+
+struct xen_dm_op_modified_memory_extent {
     /* IN - number of contiguous pages modified */
     uint32_t nr;
     uint32_t pad;