[v8,3/5] xen/mem_sharing: VM forking

Message ID	0f7d636910c45e9ca32fda4ef864a9b7d6e32745.1581362050.git.tamas.lengyel@intel.com (mailing list archive)
State	Superseded
Headers	show Return-Path: <SRS0=MXIQ=36=lists.xenproject.org=xen-devel-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 422ED20661 From: Tamas K Lengyel <tamas.lengyel@intel.com> To: xen-devel@lists.xenproject.org Date: Mon, 10 Feb 2020 11:21:27 -0800 Message-Id: <0f7d636910c45e9ca32fda4ef864a9b7d6e32745.1581362050.git.tamas.lengyel@intel.com> In-Reply-To: <cover.1581362050.git.tamas.lengyel@intel.com> References: <cover.1581362050.git.tamas.lengyel@intel.com> MIME-Version: 1.0 Subject: [Xen-devel] [PATCH v8 3/5] xen/mem_sharing: VM forking Precedence: list Cc: Stefano Stabellini <sstabellini@kernel.org>, Tamas K Lengyel <tamas.lengyel@intel.com>, Wei Liu <wl@xen.org>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>, George Dunlap <George.Dunlap@eu.citrix.com>, Andrew Cooper <andrew.cooper3@citrix.com>, Ian Jackson <ian.jackson@eu.citrix.com>, Tamas K Lengyel <tamas@tklengyel.com>, Jan Beulich <jbeulich@suse.com>, Julien Grall <julien@xen.org>, =?utf-8?q?Roger_Pau_Monn=C3=A9?= <roger.pau@citrix.com> Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" <xen-devel-bounces@lists.xenproject.org>
Series	VM forking \| expand [v8,0/5] VM forking [v8,1/5] x86/p2m: Allow p2m_get_page_from_gfn to return shared entries [v8,2/5] xen/x86: Make hap_get_allocation accessible [v8,3/5] xen/mem_sharing: VM forking [v8,4/5] x86/mem_sharing: reset a fork [v8,5/5] xen/tools: VM forking toolstack side

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index f53ae5ff86..a98e2e0479 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -2189,6 +2189,17 @@ int domain_relinquish_resources(struct domain *d) ret = relinquish_shared_pages(d); if ( ret ) return ret; + + /* + * If the domain is forked, decrement the parent's pause count + * and release the domain. + */ + if ( d->parent ) + { + domain_unpause(d->parent); + put_domain(d->parent); + d->parent = NULL; + } } #endif diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 00a9e70b7c..55520bbd23 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1915,7 +1915,7 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla, } #endif - /* Spurious fault? PoD and log-dirty also take this path. */ + /* Spurious fault? PoD, log-dirty and VM forking also take this path. */ if ( p2m_is_ram(p2mt) ) { rc = 1; diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c index 3835bc928f..ccf338918d 100644 --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -22,6 +22,7 @@ #include <xen/types.h> #include <xen/domain_page.h> +#include <xen/event.h> #include <xen/spinlock.h> #include <xen/rwlock.h> #include <xen/mm.h> @@ -36,6 +37,9 @@ #include <asm/altp2m.h> #include <asm/atomic.h> #include <asm/event.h> +#include <asm/hap.h> +#include <asm/hvm/hvm.h> +#include <asm/hvm/save.h> #include <xsm/xsm.h> #include "mm-locks.h" @@ -1444,6 +1448,193 @@ static inline int mem_sharing_control(struct domain *d, bool enable) return 0; } +/* + * Forking a page only gets called when the VM faults due to no entry being + * in the EPT for the access. Depending on the type of access we either + * populate the physmap with a shared entry for read-only access or + * fork the page if its a write access. + * + * The client p2m is already locked so we only need to lock + * the parent's here. + */ +int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool unsharing) +{ + int rc = -ENOENT; + shr_handle_t handle; + struct domain *parent; + struct p2m_domain *p2m; + unsigned long gfn_l = gfn_x(gfn); + mfn_t mfn, new_mfn; + p2m_type_t p2mt; + struct page_info *page; + + if ( !mem_sharing_is_fork(d) ) + return -ENOENT; + + parent = d->parent; + + if ( !unsharing ) + { + /* For read-only accesses we just add a shared entry to the physmap */ + while ( parent ) + { + if ( !(rc = nominate_page(parent, gfn, 0, &handle)) ) + break; + + parent = parent->parent; + } + + if ( !rc ) + { + /* The client's p2m is already locked */ + struct p2m_domain *pp2m = p2m_get_hostp2m(parent); + + p2m_lock(pp2m); + rc = add_to_physmap(parent, gfn_l, handle, d, gfn_l, false); + p2m_unlock(pp2m); + + if ( !rc ) + return 0; + } + } + + /* + * If it's a write access (ie. unsharing) or if adding a shared entry to + * the physmap failed we'll fork the page directly. + */ + p2m = p2m_get_hostp2m(d); + parent = d->parent; + + while ( parent ) + { + mfn = get_gfn_query(parent, gfn_l, &p2mt); + + if ( mfn_valid(mfn) && p2m_is_any_ram(p2mt) ) + break; + + put_gfn(parent, gfn_l); + parent = parent->parent; + } + + if ( !parent ) + return -ENOENT; + + if ( !(page = alloc_domheap_page(d, 0)) ) + { + put_gfn(parent, gfn_l); + return -ENOMEM; + } + + new_mfn = page_to_mfn(page); + copy_domain_page(new_mfn, mfn); + set_gpfn_from_mfn(mfn_x(new_mfn), gfn_l); + + put_gfn(parent, gfn_l); + + return p2m->set_entry(p2m, gfn, new_mfn, PAGE_ORDER_4K, p2m_ram_rw, + p2m->default_access, -1); +} + +static int bring_up_vcpus(struct domain *cd, struct cpupool *cpupool) +{ + int ret; + unsigned int i; + + if ( (ret = cpupool_move_domain(cd, cpupool)) ) + return ret; + + for ( i = 0; i < cd->max_vcpus; i++ ) + { + if ( cd->vcpu[i] ) + continue; + + if ( !vcpu_create(cd, i) ) + return -EINVAL; + } + + domain_update_node_affinity(cd); + return 0; +} + +static int fork_hap_allocation(struct domain *cd, struct domain *d) +{ + int rc; + bool preempted; + unsigned long mb = hap_get_allocation(d); + + if ( mb == hap_get_allocation(cd) ) + return 0; + + paging_lock(cd); + rc = hap_set_allocation(cd, mb << (20 - PAGE_SHIFT), &preempted); + paging_unlock(cd); + + if ( rc ) + return rc; + + if ( preempted ) + return -ERESTART; + + return 0; +} + +static void fork_tsc(struct domain *cd, struct domain *d) +{ + uint32_t tsc_mode; + uint32_t gtsc_khz; + uint32_t incarnation; + uint64_t elapsed_nsec; + + tsc_get_info(d, &tsc_mode, &elapsed_nsec, &gtsc_khz, &incarnation); + tsc_set_info(cd, tsc_mode, elapsed_nsec, gtsc_khz, incarnation); +} + +static int mem_sharing_fork(struct domain *d, struct domain *cd) +{ + int rc = -EINVAL; + + if ( !cd->controller_pause_count ) + return rc; + + /* + * We only want to get and pause the parent once, not each time this + * operation is restarted due to preemption. + */ + if ( !cd->parent_paused ) + { + ASSERT(get_domain(d)); + domain_pause(d); + + cd->parent_paused = true; + cd->max_pages = d->max_pages; + cd->max_vcpus = d->max_vcpus; + } + + /* this is preemptible so it's the first to get done */ + if ( (rc = fork_hap_allocation(cd, d)) ) + goto done; + + if ( (rc = bring_up_vcpus(cd, d->cpupool)) ) + goto done; + + if ( (rc = hvm_copy_context_and_params(cd, d)) ) + goto done; + + fork_tsc(cd, d); + + cd->parent = d; + + done: + if ( rc && rc != -ERESTART ) + { + domain_unpause(d); + put_domain(d); + cd->parent_paused = false; + } + + return rc; +} + int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) { int rc; @@ -1698,6 +1889,36 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) rc = debug_gref(d, mso.u.debug.u.gref); break; + case XENMEM_sharing_op_fork: + { + struct domain *pd; + + rc = -EINVAL; + if ( mso.u.fork._pad[0] || mso.u.fork._pad[1] || + mso.u.fork._pad[2] ) + goto out; + + rc = rcu_lock_live_remote_domain_by_id(mso.u.fork.parent_domain, + &pd); + if ( rc ) + goto out; + + if ( !mem_sharing_enabled(pd) ) + { + if ( (rc = mem_sharing_control(pd, true)) ) + goto out; + } + + rc = mem_sharing_fork(pd, d); + + if ( rc == -ERESTART ) + rc = hypercall_create_continuation(__HYPERVISOR_memory_op, + "lh", XENMEM_sharing_op, + arg); + rcu_unlock_domain(pd); + break; + } + default: rc = -ENOSYS; break; diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index 2c0bb7e869..72b4485970 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -509,6 +509,14 @@ mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn_l, mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL); + /* Check if we need to fork the page */ + if ( (q & P2M_ALLOC) && p2m_is_hole(*t) && + !mem_sharing_fork_page(p2m->domain, gfn, !!(q & P2M_UNSHARE)) ) + { + mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL); + } + + /* Check if we need to unshare the page */ if ( (q & P2M_UNSHARE) && p2m_is_shared(*t) ) { ASSERT(p2m_is_hostp2m(p2m)); @@ -587,7 +595,8 @@ struct page_info *p2m_get_page_from_gfn( return page; /* Error path: not a suitable GFN at all */ - if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) ) + if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) && + !mem_sharing_is_fork(p2m->domain) ) return NULL; } diff --git a/xen/include/asm-x86/mem_sharing.h b/xen/include/asm-x86/mem_sharing.h index 53760a2896..ac968fae3f 100644 --- a/xen/include/asm-x86/mem_sharing.h +++ b/xen/include/asm-x86/mem_sharing.h @@ -39,6 +39,9 @@ struct mem_sharing_domain #define mem_sharing_enabled(d) ((d)->arch.hvm.mem_sharing.enabled) +#define mem_sharing_is_fork(d) \ + (mem_sharing_enabled(d) && !!((d)->parent)) + /* Auditing of memory sharing code? */ #ifndef NDEBUG #define MEM_SHARING_AUDIT 1 @@ -88,6 +91,9 @@ static inline int mem_sharing_unshare_page(struct domain *d, return rc; } +int mem_sharing_fork_page(struct domain *d, gfn_t gfn, + bool unsharing); + /* * If called by a foreign domain, possible errors are * -EBUSY -> ring full @@ -117,6 +123,7 @@ int relinquish_shared_pages(struct domain *d); #else #define mem_sharing_enabled(d) false +#define mem_sharing_is_fork(p2m) false static inline unsigned int mem_sharing_get_nr_saved_mfns(void) { @@ -141,6 +148,16 @@ static inline int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn, return -EOPNOTSUPP; } +static inline int mem_sharing_fork(struct domain *d, struct domain *cd, bool vcpu) +{ + return -EOPNOTSUPP; +} + +static inline int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool lock) +{ + return -EOPNOTSUPP; +} + #endif #endif /* __MEM_SHARING_H__ */ diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h index cfdda6e2a8..90a3f4498e 100644 --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -482,6 +482,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t); #define XENMEM_sharing_op_add_physmap 6 #define XENMEM_sharing_op_audit 7 #define XENMEM_sharing_op_range_share 8 +#define XENMEM_sharing_op_fork 9 #define XENMEM_SHARING_OP_S_HANDLE_INVALID (-10) #define XENMEM_SHARING_OP_C_HANDLE_INVALID (-9) @@ -532,6 +533,10 @@ struct xen_mem_sharing_op { uint32_t gref; /* IN: gref to debug */ } u; } debug; + struct mem_sharing_op_fork { + domid_t parent_domain; + uint16_t _pad[3]; /* Must be set to 0 */ + } fork; } u; }; typedef struct xen_mem_sharing_op xen_mem_sharing_op_t; diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 7c5c437247..8ed727e10c 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -507,6 +507,8 @@ struct domain /* Memory sharing support */ #ifdef CONFIG_MEM_SHARING struct vm_event_domain *vm_event_share; + struct domain *parent; /* VM fork parent */ + bool parent_paused; #endif /* Memory paging support */ #ifdef CONFIG_HAS_MEM_PAGING

[v8,3/5] xen/mem_sharing: VM forking

Commit Message

Comments

Patch