diff mbox series

[v10,2/3] x86/mem_sharing: reset a fork

Message ID 628c5cdc73c589e45a19cc0ddb5cf972b00eb3dd.1582658216.git.tamas.lengyel@intel.com (mailing list archive)
State Superseded
Headers show
Series VM forking | expand

Commit Message

Tamas K Lengyel Feb. 25, 2020, 7:17 p.m. UTC
Implement hypercall that allows a fork to shed all memory that got allocated
for it during its execution and re-load its vCPU context from the parent VM.
This allows the forked VM to reset into the same state the parent VM is in a
faster way then creating a new fork would be. Measurements show about a 2x
speedup during normal fuzzing operations. Performance may vary depending how
much memory got allocated for the forked VM. If it has been completely
deduplicated from the parent VM then creating a new fork would likely be more
performant.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
v10: implemented hypercall continuation similar to the existing range_share op
---
 xen/arch/x86/mm/mem_sharing.c | 126 +++++++++++++++++++++++++++++++++-
 xen/include/public/memory.h   |   4 ++
 2 files changed, 129 insertions(+), 1 deletion(-)

Comments

Roger Pau Monné Feb. 26, 2020, 3:25 p.m. UTC | #1
On Tue, Feb 25, 2020 at 11:17:56AM -0800, Tamas K Lengyel wrote:
> Implement hypercall that allows a fork to shed all memory that got allocated
> for it during its execution and re-load its vCPU context from the parent VM.
> This allows the forked VM to reset into the same state the parent VM is in a
> faster way then creating a new fork would be. Measurements show about a 2x
> speedup during normal fuzzing operations. Performance may vary depending how
> much memory got allocated for the forked VM. If it has been completely
> deduplicated from the parent VM then creating a new fork would likely be more
> performant.
> 
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> ---
> v10: implemented hypercall continuation similar to the existing range_share op
> ---
>  xen/arch/x86/mm/mem_sharing.c | 126 +++++++++++++++++++++++++++++++++-
>  xen/include/public/memory.h   |   4 ++
>  2 files changed, 129 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
> index 8ee37e6943..aa4358aae4 100644
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -1673,7 +1673,6 @@ static int fork(struct domain *d, struct domain *cd)
>          domain_pause(d);
>          cd->parent_paused = true;
>          cd->max_pages = d->max_pages;
> -        cd->max_vcpus = d->max_vcpus;
>      }
>  
>      /* this is preemptible so it's the first to get done */
> @@ -1704,6 +1703,91 @@ static int fork(struct domain *d, struct domain *cd)
>      return rc;
>  }
>  
> +/*
> + * The fork reset operation is intended to be used on short-lived forks only.
> + */
> +static int fork_reset(struct domain *d, struct domain *cd,
> +                      struct mem_sharing_op_fork_reset *fr)
> +{
> +    int rc = 0;
> +    struct p2m_domain* p2m = p2m_get_hostp2m(cd);
> +    struct page_info *page, *tmp;
> +    unsigned long list_position = 0, preempt_count = 0, start = fr->opaque;
> +
> +    domain_pause(cd);
> +
> +    page_list_for_each_safe(page, tmp, &cd->page_list)
> +    {
> +        p2m_type_t p2mt;
> +        p2m_access_t p2ma;
> +        gfn_t gfn;
> +        mfn_t mfn;
> +        bool shared = false;
> +
> +        list_position++;
> +
> +        /* Resume were we left of before preemption */
> +        if ( start && list_position < start )
> +            continue;
> +
> +        mfn = page_to_mfn(page);
> +        if ( mfn_valid(mfn) )
> +        {
> +
> +            gfn = mfn_to_gfn(cd, mfn);
> +            mfn = __get_gfn_type_access(p2m, gfn_x(gfn), &p2mt, &p2ma,
> +                                        0, NULL, false);
> +
> +            if ( p2m_is_ram(p2mt) && !p2m_is_shared(p2mt) )
> +            {
> +                /* take an extra reference, must work for a shared page */
> +                if( !get_page(page, cd) )
> +                {
> +                    ASSERT_UNREACHABLE();
> +                    return -EINVAL;
> +                }
> +
> +                shared = true;
> +                preempt_count += 0x10;
> +
> +                /*
> +                 * Must succeed, it's a shared page that exists and
> +                 * thus its size is guaranteed to be 4k so we are not splitting
> +                 * large pages.
> +                 */
> +                rc = p2m->set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
> +                                    p2m_invalid, p2m_access_rwx, -1);
> +                ASSERT(!rc);
> +
> +                put_page_alloc_ref(page);
> +                put_page(page);
> +            }
> +        }
> +
> +        if ( !shared )
> +            preempt_count++;
> +
> +        /* Preempt every 2MiB (shared) or 32MiB (unshared) - arbitrary. */
> +        if ( preempt_count >= 0x2000 )
> +        {
> +            if ( hypercall_preempt_check() )
> +            {
> +                rc = -ERESTART;
> +                break;
> +            }
> +            preempt_count = 0;
> +        }
> +    }
> +
> +    if ( rc )
> +        fr->opaque = list_position;
> +    else if ( !(rc = hvm_copy_context_and_params(cd, d)) )
> +        fork_tsc(cd, d);

You also need to reset the contents of the special pages, the
vcpu_info pages and the shared_info page in order to match the state
the VM was in when forking.

PV timers should also be reset to parent's state AFAICT, or else you
will get spurious timer interrupts.

In fact you should check against the state of the parent, because the
fork might have changed the position of the shared info or any other
of those magic areas, and that should be reverted to the state they
are in the parent.

Thanks, Roger.
Tamas K Lengyel Feb. 26, 2020, 3:28 p.m. UTC | #2
> You also need to reset the contents of the special pages, the
> vcpu_info pages and the shared_info page in order to match the state
> the VM was in when forking.

Ack.

>
> PV timers should also be reset to parent's state AFAICT, or else you
> will get spurious timer interrupts.

Could you point me to the right direction here for where the timers
are located in the codebase?

> In fact you should check against the state of the parent, because the
> fork might have changed the position of the shared info or any other
> of those magic areas, and that should be reverted to the state they
> are in the parent.

Makes sense.

Thanks,
Tamas
Roger Pau Monné Feb. 26, 2020, 3:38 p.m. UTC | #3
On Wed, Feb 26, 2020 at 08:28:31AM -0700, Tamas K Lengyel wrote:
> > You also need to reset the contents of the special pages, the
> > vcpu_info pages and the shared_info page in order to match the state
> > the VM was in when forking.
> 
> Ack.
> 
> >
> > PV timers should also be reset to parent's state AFAICT, or else you
> > will get spurious timer interrupts.
> 
> Could you point me to the right direction here for where the timers
> are located in the codebase?

The code paths start at VCPUOP_set_periodic_timer,
VCPUOP_stop_periodic_timer, VCPUOP_set_singleshot_timer and
VCPUOP_stop_singleshot_timer. AFAICT it's mostly a matter of copying
the state and staring the timer if it was already active on the
parent.

Thanks, Roger.
diff mbox series

Patch

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 8ee37e6943..aa4358aae4 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -1673,7 +1673,6 @@  static int fork(struct domain *d, struct domain *cd)
         domain_pause(d);
         cd->parent_paused = true;
         cd->max_pages = d->max_pages;
-        cd->max_vcpus = d->max_vcpus;
     }
 
     /* this is preemptible so it's the first to get done */
@@ -1704,6 +1703,91 @@  static int fork(struct domain *d, struct domain *cd)
     return rc;
 }
 
+/*
+ * The fork reset operation is intended to be used on short-lived forks only.
+ */
+static int fork_reset(struct domain *d, struct domain *cd,
+                      struct mem_sharing_op_fork_reset *fr)
+{
+    int rc = 0;
+    struct p2m_domain* p2m = p2m_get_hostp2m(cd);
+    struct page_info *page, *tmp;
+    unsigned long list_position = 0, preempt_count = 0, start = fr->opaque;
+
+    domain_pause(cd);
+
+    page_list_for_each_safe(page, tmp, &cd->page_list)
+    {
+        p2m_type_t p2mt;
+        p2m_access_t p2ma;
+        gfn_t gfn;
+        mfn_t mfn;
+        bool shared = false;
+
+        list_position++;
+
+        /* Resume were we left of before preemption */
+        if ( start && list_position < start )
+            continue;
+
+        mfn = page_to_mfn(page);
+        if ( mfn_valid(mfn) )
+        {
+
+            gfn = mfn_to_gfn(cd, mfn);
+            mfn = __get_gfn_type_access(p2m, gfn_x(gfn), &p2mt, &p2ma,
+                                        0, NULL, false);
+
+            if ( p2m_is_ram(p2mt) && !p2m_is_shared(p2mt) )
+            {
+                /* take an extra reference, must work for a shared page */
+                if( !get_page(page, cd) )
+                {
+                    ASSERT_UNREACHABLE();
+                    return -EINVAL;
+                }
+
+                shared = true;
+                preempt_count += 0x10;
+
+                /*
+                 * Must succeed, it's a shared page that exists and
+                 * thus its size is guaranteed to be 4k so we are not splitting
+                 * large pages.
+                 */
+                rc = p2m->set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
+                                    p2m_invalid, p2m_access_rwx, -1);
+                ASSERT(!rc);
+
+                put_page_alloc_ref(page);
+                put_page(page);
+            }
+        }
+
+        if ( !shared )
+            preempt_count++;
+
+        /* Preempt every 2MiB (shared) or 32MiB (unshared) - arbitrary. */
+        if ( preempt_count >= 0x2000 )
+        {
+            if ( hypercall_preempt_check() )
+            {
+                rc = -ERESTART;
+                break;
+            }
+            preempt_count = 0;
+        }
+    }
+
+    if ( rc )
+        fr->opaque = list_position;
+    else if ( !(rc = hvm_copy_context_and_params(cd, d)) )
+        fork_tsc(cd, d);
+
+    domain_unpause(cd);
+    return rc;
+}
+
 int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
 {
     int rc;
@@ -1973,7 +2057,17 @@  int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
             goto out;
 
         if ( !mem_sharing_enabled(pd) && (rc = mem_sharing_control(pd, true)) )
+        {
+            rcu_unlock_domain(pd);
             goto out;
+        }
+
+        rc = -EINVAL;
+        if ( pd->max_vcpus != d->max_vcpus )
+        {
+            rcu_unlock_domain(pd);
+            goto out;
+        }
 
         rc = fork(pd, d);
 
@@ -1985,6 +2079,36 @@  int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
         break;
     }
 
+    case XENMEM_sharing_op_fork_reset:
+    {
+        struct domain *pd;
+
+        rc = -ENOSYS;
+        if ( !mem_sharing_is_fork(d) )
+            goto out;
+
+        rc = rcu_lock_live_remote_domain_by_id(d->parent->domain_id, &pd);
+        if ( rc )
+            goto out;
+
+        rc = fork_reset(pd, d, &mso.u.fork_reset);
+
+        rcu_unlock_domain(pd);
+
+        if ( rc > 0 )
+        {
+            if ( __copy_to_guest(arg, &mso, 1) )
+                rc = -EFAULT;
+            else
+                rc = hypercall_create_continuation(__HYPERVISOR_memory_op,
+                                                   "lh", XENMEM_sharing_op,
+                                                   arg);
+        }
+        else
+            mso.u.fork_reset.opaque = 0;
+        break;
+    }
+
     default:
         rc = -ENOSYS;
         break;
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index c1dbad060e..7ca07c01dd 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -483,6 +483,7 @@  DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t);
 #define XENMEM_sharing_op_audit             7
 #define XENMEM_sharing_op_range_share       8
 #define XENMEM_sharing_op_fork              9
+#define XENMEM_sharing_op_fork_reset        10
 
 #define XENMEM_SHARING_OP_S_HANDLE_INVALID  (-10)
 #define XENMEM_SHARING_OP_C_HANDLE_INVALID  (-9)
@@ -537,6 +538,9 @@  struct xen_mem_sharing_op {
             domid_t parent_domain;        /* IN: parent's domain id */
             uint16_t _pad[3];             /* Must be set to 0 */
         } fork;
+        struct mem_sharing_op_fork_reset {   /* OP_FORK_RESET */
+            uint64_aligned_t opaque;         /* Must be set to 0 */
+        } fork_reset;
     } u;
 };
 typedef struct xen_mem_sharing_op xen_mem_sharing_op_t;