From patchwork Tue Jan 12 10:08:26 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 8015791 Return-Path: X-Original-To: patchwork-xen-devel@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork2.web.kernel.org (Postfix) with ESMTP id 7F36ABEEE5 for ; Tue, 12 Jan 2016 10:11:52 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 44970203A4 for ; Tue, 12 Jan 2016 10:11:51 +0000 (UTC) Received: from lists.xen.org (lists.xenproject.org [50.57.142.19]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 0F92620115 for ; Tue, 12 Jan 2016 10:11:50 +0000 (UTC) Received: from localhost ([127.0.0.1] helo=lists.xen.org) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1aIvsP-0007oR-Uh; Tue, 12 Jan 2016 10:08:34 +0000 Received: from mail6.bemta5.messagelabs.com ([195.245.231.135]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1aIvsN-0007oH-4B for xen-devel@lists.xenproject.org; Tue, 12 Jan 2016 10:08:31 +0000 Received: from [85.158.139.211] by server-17.bemta-5.messagelabs.com id A5/8A-21901-E90D4965; Tue, 12 Jan 2016 10:08:30 +0000 X-Env-Sender: JBeulich@suse.com X-Msg-Ref: server-8.tower-206.messagelabs.com!1452593307!15034456!1 X-Originating-IP: [137.65.248.74] X-SpamReason: No, hits=0.0 required=7.0 tests= X-StarScan-Received: X-StarScan-Version: 7.35.1; banners=-,-,- X-VirusChecked: Checked Received: (qmail 29875 invoked from network); 12 Jan 2016 10:08:28 -0000 Received: from prv-mh.provo.novell.com (HELO prv-mh.provo.novell.com) (137.65.248.74) by server-8.tower-206.messagelabs.com with DHE-RSA-AES256-GCM-SHA384 encrypted SMTP; 12 Jan 2016 10:08:28 -0000 Received: from INET-PRV-MTA by prv-mh.provo.novell.com with Novell_GroupWise; Tue, 12 Jan 2016 03:08:26 -0700 Message-Id: <5694DEAA02000078000C5D7A@prv-mh.provo.novell.com> X-Mailer: Novell GroupWise Internet Agent 14.2.0 Date: Tue, 12 Jan 2016 03:08:26 -0700 From: "Jan Beulich" To: "xen-devel" Mime-Version: 1.0 Cc: Andrew Cooper , Keir Fraser Subject: [Xen-devel] [PATCH] x86/PV: fix unintended dependency of m2p-strict mode on migration-v2 X-BeenThere: xen-devel@lists.xen.org X-Mailman-Version: 2.1.13 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org X-Spam-Status: No, score=-4.2 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_MED, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP This went unnoticed until a backport of this to an older Xen got used, causing migration of guests enabling this VM assist to fail, because page table pinning there preceeds vCPU context loading, and hence L4 tables get initialized for the wrong mode. Fix this by post-processing L4 tables when setting the intended VM assist flags for the guest. Note that this leaves in place a dependency on vCPU 0 getting its guest context restored first, but afaict the logic here is not the only thing depending on that. Signed-off-by: Jan Beulich x86/PV: fix unintended dependency of m2p-strict mode on migration-v2 This went unnoticed until a backport of this to an older Xen got used, causing migration of guests enabling this VM assist to fail, because page table pinning there preceeds vCPU context loading, and hence L4 tables get initialized for the wrong mode. Fix this by post-processing L4 tables when setting the intended VM assist flags for the guest. Note that this leaves in place a dependency on vCPU 0 getting its guest context restored first, but afaict the logic here is not the only thing depending on that. Signed-off-by: Jan Beulich --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1067,8 +1067,48 @@ int arch_set_info_guest( goto out; if ( v->vcpu_id == 0 ) + { d->vm_assist = c(vm_assist); + /* + * In the restore case we need to deal with L4 pages which got + * initialized with m2p_strict still clear (and which hence lack the + * correct initial RO_MPT_VIRT_{START,END} L4 entry). + */ + if ( d != current->domain && VM_ASSIST(d, m2p_strict) && + is_pv_domain(d) && !is_pv_32bit_domain(d) && + atomic_read(&d->arch.pv_domain.nr_l4_pages) ) + { + bool_t done = 0; + + spin_lock_recursive(&d->page_alloc_lock); + + for ( i = 0; ; ) + { + struct page_info *page = page_list_remove_head(&d->page_list); + + if ( page_lock(page) ) + { + if ( (page->u.inuse.type_info & PGT_type_mask) == + PGT_l4_page_table ) + done = !fill_ro_mpt(page_to_mfn(page)); + + page_unlock(page); + } + + page_list_add_tail(page, &d->page_list); + + if ( done || (!(++i & 0xff) && hypercall_preempt_check()) ) + break; + } + + spin_unlock_recursive(&d->page_alloc_lock); + + if ( !done ) + return -ERESTART; + } + } + rc = put_old_guest_table(current); if ( rc ) return rc; --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -1463,13 +1463,20 @@ void init_guest_l4_table(l4_pgentry_t l4 l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty(); } -void fill_ro_mpt(unsigned long mfn) +bool_t fill_ro_mpt(unsigned long mfn) { l4_pgentry_t *l4tab = map_domain_page(_mfn(mfn)); + bool_t ret = 0; - l4tab[l4_table_offset(RO_MPT_VIRT_START)] = - idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]; + if ( !l4e_get_intpte(l4tab[l4_table_offset(RO_MPT_VIRT_START)]) ) + { + l4tab[l4_table_offset(RO_MPT_VIRT_START)] = + idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]; + ret = 1; + } unmap_domain_page(l4tab); + + return ret; } void zap_ro_mpt(unsigned long mfn) @@ -1527,10 +1534,15 @@ static int alloc_l4_table(struct page_in adjust_guest_l4e(pl4e[i], d); } - init_guest_l4_table(pl4e, d, !VM_ASSIST(d, m2p_strict)); + if ( rc >= 0 ) + { + init_guest_l4_table(pl4e, d, !VM_ASSIST(d, m2p_strict)); + atomic_inc(&d->arch.pv_domain.nr_l4_pages); + rc = 0; + } unmap_domain_page(pl4e); - return rc > 0 ? 0 : rc; + return rc; } static void free_l1_table(struct page_info *page) @@ -1648,7 +1660,13 @@ static int free_l4_table(struct page_inf unmap_domain_page(pl4e); - return rc > 0 ? 0 : rc; + if ( rc >= 0 ) + { + atomic_dec(&d->arch.pv_domain.nr_l4_pages); + rc = 0; + } + + return rc; } int page_lock(struct page_info *page) --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -248,6 +248,8 @@ struct pv_domain { l1_pgentry_t **gdt_ldt_l1tab; + atomic_t nr_l4_pages; + /* map_domain_page() mapping cache. */ struct mapcache_domain mapcache; }; --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -322,7 +322,7 @@ int free_page_type(struct page_info *pag void init_guest_l4_table(l4_pgentry_t[], const struct domain *, bool_t zap_ro_mpt); -void fill_ro_mpt(unsigned long mfn); +bool_t fill_ro_mpt(unsigned long mfn); void zap_ro_mpt(unsigned long mfn); int is_iomem_page(unsigned long mfn); --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1067,8 +1067,48 @@ int arch_set_info_guest( goto out; if ( v->vcpu_id == 0 ) + { d->vm_assist = c(vm_assist); + /* + * In the restore case we need to deal with L4 pages which got + * initialized with m2p_strict still clear (and which hence lack the + * correct initial RO_MPT_VIRT_{START,END} L4 entry). + */ + if ( d != current->domain && VM_ASSIST(d, m2p_strict) && + is_pv_domain(d) && !is_pv_32bit_domain(d) && + atomic_read(&d->arch.pv_domain.nr_l4_pages) ) + { + bool_t done = 0; + + spin_lock_recursive(&d->page_alloc_lock); + + for ( i = 0; ; ) + { + struct page_info *page = page_list_remove_head(&d->page_list); + + if ( page_lock(page) ) + { + if ( (page->u.inuse.type_info & PGT_type_mask) == + PGT_l4_page_table ) + done = !fill_ro_mpt(page_to_mfn(page)); + + page_unlock(page); + } + + page_list_add_tail(page, &d->page_list); + + if ( done || (!(++i & 0xff) && hypercall_preempt_check()) ) + break; + } + + spin_unlock_recursive(&d->page_alloc_lock); + + if ( !done ) + return -ERESTART; + } + } + rc = put_old_guest_table(current); if ( rc ) return rc; --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -1463,13 +1463,20 @@ void init_guest_l4_table(l4_pgentry_t l4 l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty(); } -void fill_ro_mpt(unsigned long mfn) +bool_t fill_ro_mpt(unsigned long mfn) { l4_pgentry_t *l4tab = map_domain_page(_mfn(mfn)); + bool_t ret = 0; - l4tab[l4_table_offset(RO_MPT_VIRT_START)] = - idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]; + if ( !l4e_get_intpte(l4tab[l4_table_offset(RO_MPT_VIRT_START)]) ) + { + l4tab[l4_table_offset(RO_MPT_VIRT_START)] = + idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]; + ret = 1; + } unmap_domain_page(l4tab); + + return ret; } void zap_ro_mpt(unsigned long mfn) @@ -1527,10 +1534,15 @@ static int alloc_l4_table(struct page_in adjust_guest_l4e(pl4e[i], d); } - init_guest_l4_table(pl4e, d, !VM_ASSIST(d, m2p_strict)); + if ( rc >= 0 ) + { + init_guest_l4_table(pl4e, d, !VM_ASSIST(d, m2p_strict)); + atomic_inc(&d->arch.pv_domain.nr_l4_pages); + rc = 0; + } unmap_domain_page(pl4e); - return rc > 0 ? 0 : rc; + return rc; } static void free_l1_table(struct page_info *page) @@ -1648,7 +1660,13 @@ static int free_l4_table(struct page_inf unmap_domain_page(pl4e); - return rc > 0 ? 0 : rc; + if ( rc >= 0 ) + { + atomic_dec(&d->arch.pv_domain.nr_l4_pages); + rc = 0; + } + + return rc; } int page_lock(struct page_info *page) --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -248,6 +248,8 @@ struct pv_domain { l1_pgentry_t **gdt_ldt_l1tab; + atomic_t nr_l4_pages; + /* map_domain_page() mapping cache. */ struct mapcache_domain mapcache; }; --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -322,7 +322,7 @@ int free_page_type(struct page_info *pag void init_guest_l4_table(l4_pgentry_t[], const struct domain *, bool_t zap_ro_mpt); -void fill_ro_mpt(unsigned long mfn); +bool_t fill_ro_mpt(unsigned long mfn); void zap_ro_mpt(unsigned long mfn); int is_iomem_page(unsigned long mfn);