Message ID | fb927228a8f68ce983ae0b46e6665b5b8dd0764e.1647970630.git.tamas.lengyel@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/3] x86/mem_sharing: option to skip populating special pages during fork | expand |
On Tue, Mar 22, 2022 at 01:41:37PM -0400, Tamas K Lengyel wrote: > Add option to the fork memop to skip populating the fork with special pages. > These special pages are only necessary when setting up forks to be fully > functional with a toolstack. For short-lived forks where no toolstack is active > these pages are uneccesary. I'm not sure those are strictly related to having a toolstack. For example the vcpu_info has nothing to do with having a toolstack, and is only used by the guest in order to receive events or fetch time related data. So while a short-lived fork might not make use of those, that has nothing to do with having a toolstack or not. > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com> > --- > xen/arch/x86/include/asm/hvm/domain.h | 4 +++- > xen/arch/x86/mm/mem_sharing.c | 33 +++++++++++++++++---------- > xen/include/public/memory.h | 4 ++-- > 3 files changed, 26 insertions(+), 15 deletions(-) > > diff --git a/xen/arch/x86/include/asm/hvm/domain.h b/xen/arch/x86/include/asm/hvm/domain.h > index 698455444e..446cd06411 100644 > --- a/xen/arch/x86/include/asm/hvm/domain.h > +++ b/xen/arch/x86/include/asm/hvm/domain.h > @@ -31,7 +31,9 @@ > #ifdef CONFIG_MEM_SHARING > struct mem_sharing_domain > { > - bool enabled, block_interrupts; > + bool enabled; > + bool block_interrupts; > + bool skip_special_pages; > > /* > * When releasing shared gfn's in a preemptible manner, recall where > diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c > index 15e6a7ed81..84c04ddfa3 100644 > --- a/xen/arch/x86/mm/mem_sharing.c > +++ b/xen/arch/x86/mm/mem_sharing.c > @@ -1643,7 +1643,8 @@ static int bring_up_vcpus(struct domain *cd, struct domain *d) > return 0; > } > > -static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > +static int copy_vcpu_settings(struct domain *cd, const struct domain *d, > + bool skip_special_pages) > { > unsigned int i; > struct p2m_domain *p2m = p2m_get_hostp2m(cd); > @@ -1660,7 +1661,7 @@ static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > > /* Copy & map in the vcpu_info page if the guest uses one */ > vcpu_info_mfn = d_vcpu->vcpu_info_mfn; > - if ( !mfn_eq(vcpu_info_mfn, INVALID_MFN) ) > + if ( !skip_special_pages && !mfn_eq(vcpu_info_mfn, INVALID_MFN) ) > { > mfn_t new_vcpu_info_mfn = cd_vcpu->vcpu_info_mfn; > > @@ -1807,17 +1808,18 @@ static int copy_special_pages(struct domain *cd, struct domain *d) > return 0; > } > > -static int copy_settings(struct domain *cd, struct domain *d) > +static int copy_settings(struct domain *cd, struct domain *d, > + bool skip_special_pages) > { > int rc; > > - if ( (rc = copy_vcpu_settings(cd, d)) ) > + if ( (rc = copy_vcpu_settings(cd, d, skip_special_pages)) ) > return rc; > > if ( (rc = hvm_copy_context_and_params(cd, d)) ) > return rc; > > - if ( (rc = copy_special_pages(cd, d)) ) > + if ( !skip_special_pages && (rc = copy_special_pages(cd, d)) ) > return rc; > > copy_tsc(cd, d); > @@ -1826,9 +1828,11 @@ static int copy_settings(struct domain *cd, struct domain *d) > return rc; > } > > -static int fork(struct domain *cd, struct domain *d) > +static int fork(struct domain *cd, struct domain *d, uint16_t flags) > { > int rc = -EBUSY; > + bool block_interrupts = flags & XENMEM_FORK_BLOCK_INTERRUPTS; > + bool skip_special_pages = flags & XENMEM_FORK_SKIP_SPECIAL_PAGES; > > if ( !cd->controller_pause_count ) > return rc; > @@ -1856,7 +1860,13 @@ static int fork(struct domain *cd, struct domain *d) > if ( (rc = bring_up_vcpus(cd, d)) ) > goto done; > > - rc = copy_settings(cd, d); > + if ( !(rc = copy_settings(cd, d, skip_special_pages)) ) Can you set cd->arch.hvm.mem_sharing.{block_interrupts,skip_special_pages} earlier so that you don't need to pass the options around to copy_settings and copy_vcpu_settings? > + { > + cd->arch.hvm.mem_sharing.block_interrupts = block_interrupts; > + cd->arch.hvm.mem_sharing.skip_special_pages = skip_special_pages; > + /* skip mapping the vAPIC page on unpause if skipping special pages */ > + cd->creation_finished = skip_special_pages; I think this is dangerous. While it might be true at the moment that the arch_domain_creation_finished only maps the vAPIC page, there's no guarantee it couldn't do other stuff in the future that could be required for the VM to be started. Does it add much overhead to map the vAPIC page? Thanks, Roger.
On Thu, Mar 24, 2022 at 10:50 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > > On Tue, Mar 22, 2022 at 01:41:37PM -0400, Tamas K Lengyel wrote: > > Add option to the fork memop to skip populating the fork with special pages. > > These special pages are only necessary when setting up forks to be fully > > functional with a toolstack. For short-lived forks where no toolstack is active > > these pages are uneccesary. > > I'm not sure those are strictly related to having a toolstack. For > example the vcpu_info has nothing to do with having a toolstack, and > is only used by the guest in order to receive events or fetch time > related data. So while a short-lived fork might not make use of those, > that has nothing to do with having a toolstack or not. Fair enough, the point is that the short live fork doesn't use these pages. > > > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com> > > --- > > xen/arch/x86/include/asm/hvm/domain.h | 4 +++- > > xen/arch/x86/mm/mem_sharing.c | 33 +++++++++++++++++---------- > > xen/include/public/memory.h | 4 ++-- > > 3 files changed, 26 insertions(+), 15 deletions(-) > > > > diff --git a/xen/arch/x86/include/asm/hvm/domain.h b/xen/arch/x86/include/asm/hvm/domain.h > > index 698455444e..446cd06411 100644 > > --- a/xen/arch/x86/include/asm/hvm/domain.h > > +++ b/xen/arch/x86/include/asm/hvm/domain.h > > @@ -31,7 +31,9 @@ > > #ifdef CONFIG_MEM_SHARING > > struct mem_sharing_domain > > { > > - bool enabled, block_interrupts; > > + bool enabled; > > + bool block_interrupts; > > + bool skip_special_pages; > > > > /* > > * When releasing shared gfn's in a preemptible manner, recall where > > diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c > > index 15e6a7ed81..84c04ddfa3 100644 > > --- a/xen/arch/x86/mm/mem_sharing.c > > +++ b/xen/arch/x86/mm/mem_sharing.c > > @@ -1643,7 +1643,8 @@ static int bring_up_vcpus(struct domain *cd, struct domain *d) > > return 0; > > } > > > > -static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > > +static int copy_vcpu_settings(struct domain *cd, const struct domain *d, > > + bool skip_special_pages) > > { > > unsigned int i; > > struct p2m_domain *p2m = p2m_get_hostp2m(cd); > > @@ -1660,7 +1661,7 @@ static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > > > > /* Copy & map in the vcpu_info page if the guest uses one */ > > vcpu_info_mfn = d_vcpu->vcpu_info_mfn; > > - if ( !mfn_eq(vcpu_info_mfn, INVALID_MFN) ) > > + if ( !skip_special_pages && !mfn_eq(vcpu_info_mfn, INVALID_MFN) ) > > { > > mfn_t new_vcpu_info_mfn = cd_vcpu->vcpu_info_mfn; > > > > @@ -1807,17 +1808,18 @@ static int copy_special_pages(struct domain *cd, struct domain *d) > > return 0; > > } > > > > -static int copy_settings(struct domain *cd, struct domain *d) > > +static int copy_settings(struct domain *cd, struct domain *d, > > + bool skip_special_pages) > > { > > int rc; > > > > - if ( (rc = copy_vcpu_settings(cd, d)) ) > > + if ( (rc = copy_vcpu_settings(cd, d, skip_special_pages)) ) > > return rc; > > > > if ( (rc = hvm_copy_context_and_params(cd, d)) ) > > return rc; > > > > - if ( (rc = copy_special_pages(cd, d)) ) > > + if ( !skip_special_pages && (rc = copy_special_pages(cd, d)) ) > > return rc; > > > > copy_tsc(cd, d); > > @@ -1826,9 +1828,11 @@ static int copy_settings(struct domain *cd, struct domain *d) > > return rc; > > } > > > > -static int fork(struct domain *cd, struct domain *d) > > +static int fork(struct domain *cd, struct domain *d, uint16_t flags) > > { > > int rc = -EBUSY; > > + bool block_interrupts = flags & XENMEM_FORK_BLOCK_INTERRUPTS; > > + bool skip_special_pages = flags & XENMEM_FORK_SKIP_SPECIAL_PAGES; > > > > if ( !cd->controller_pause_count ) > > return rc; > > @@ -1856,7 +1860,13 @@ static int fork(struct domain *cd, struct domain *d) > > if ( (rc = bring_up_vcpus(cd, d)) ) > > goto done; > > > > - rc = copy_settings(cd, d); > > + if ( !(rc = copy_settings(cd, d, skip_special_pages)) ) > > Can you set > cd->arch.hvm.mem_sharing.{block_interrupts,skip_special_pages} earlier > so that you don't need to pass the options around to copy_settings and > copy_vcpu_settings? Would be possible yes, but then we would have to clear them in case the forking failed at any point. Setting them only at the end when the fork finished ensures that those fields are only ever valid if the VM is a fork. Both are valid approaches and I prefer this over the other. > > > + { > > + cd->arch.hvm.mem_sharing.block_interrupts = block_interrupts; > > + cd->arch.hvm.mem_sharing.skip_special_pages = skip_special_pages; > > + /* skip mapping the vAPIC page on unpause if skipping special pages */ > > + cd->creation_finished = skip_special_pages; > > I think this is dangerous. While it might be true at the moment that > the arch_domain_creation_finished only maps the vAPIC page, there's no > guarantee it couldn't do other stuff in the future that could be > required for the VM to be started. I understand this domain_creation_finished route could be expanded in the future to include other stuff, but IMHO we can evaluate what to do about that when and if it becomes necessary. This is all experimental to begin with. > Does it add much overhead to map the vAPIC page? I don't have numbers but it does add overhead. When we do a fork reset we loop through all pages in the physmap to determine what needs to be removed. So having an extra page means that loop is always larger than it actually needs to be. Considering we do the reset thousands of times per second per core, you can imagine it adding up over time. Tamas
On Thu, Mar 24, 2022 at 11:15:08AM -0400, Tamas K Lengyel wrote: > On Thu, Mar 24, 2022 at 10:50 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > > > > On Tue, Mar 22, 2022 at 01:41:37PM -0400, Tamas K Lengyel wrote: > > > Add option to the fork memop to skip populating the fork with special pages. > > > These special pages are only necessary when setting up forks to be fully > > > functional with a toolstack. For short-lived forks where no toolstack is active > > > these pages are uneccesary. > > > > I'm not sure those are strictly related to having a toolstack. For > > example the vcpu_info has nothing to do with having a toolstack, and > > is only used by the guest in order to receive events or fetch time > > related data. So while a short-lived fork might not make use of those, > > that has nothing to do with having a toolstack or not. > > Fair enough, the point is that the short live fork doesn't use these pages. > > > > > > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com> > > > --- > > > xen/arch/x86/include/asm/hvm/domain.h | 4 +++- > > > xen/arch/x86/mm/mem_sharing.c | 33 +++++++++++++++++---------- > > > xen/include/public/memory.h | 4 ++-- > > > 3 files changed, 26 insertions(+), 15 deletions(-) > > > > > > diff --git a/xen/arch/x86/include/asm/hvm/domain.h b/xen/arch/x86/include/asm/hvm/domain.h > > > index 698455444e..446cd06411 100644 > > > --- a/xen/arch/x86/include/asm/hvm/domain.h > > > +++ b/xen/arch/x86/include/asm/hvm/domain.h > > > @@ -31,7 +31,9 @@ > > > #ifdef CONFIG_MEM_SHARING > > > struct mem_sharing_domain > > > { > > > - bool enabled, block_interrupts; > > > + bool enabled; > > > + bool block_interrupts; > > > + bool skip_special_pages; > > > > > > /* > > > * When releasing shared gfn's in a preemptible manner, recall where > > > diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c > > > index 15e6a7ed81..84c04ddfa3 100644 > > > --- a/xen/arch/x86/mm/mem_sharing.c > > > +++ b/xen/arch/x86/mm/mem_sharing.c > > > @@ -1643,7 +1643,8 @@ static int bring_up_vcpus(struct domain *cd, struct domain *d) > > > return 0; > > > } > > > > > > -static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > > > +static int copy_vcpu_settings(struct domain *cd, const struct domain *d, > > > + bool skip_special_pages) > > > { > > > unsigned int i; > > > struct p2m_domain *p2m = p2m_get_hostp2m(cd); > > > @@ -1660,7 +1661,7 @@ static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > > > > > > /* Copy & map in the vcpu_info page if the guest uses one */ > > > vcpu_info_mfn = d_vcpu->vcpu_info_mfn; > > > - if ( !mfn_eq(vcpu_info_mfn, INVALID_MFN) ) > > > + if ( !skip_special_pages && !mfn_eq(vcpu_info_mfn, INVALID_MFN) ) > > > { > > > mfn_t new_vcpu_info_mfn = cd_vcpu->vcpu_info_mfn; > > > > > > @@ -1807,17 +1808,18 @@ static int copy_special_pages(struct domain *cd, struct domain *d) > > > return 0; > > > } > > > > > > -static int copy_settings(struct domain *cd, struct domain *d) > > > +static int copy_settings(struct domain *cd, struct domain *d, > > > + bool skip_special_pages) > > > { > > > int rc; > > > > > > - if ( (rc = copy_vcpu_settings(cd, d)) ) > > > + if ( (rc = copy_vcpu_settings(cd, d, skip_special_pages)) ) > > > return rc; > > > > > > if ( (rc = hvm_copy_context_and_params(cd, d)) ) > > > return rc; > > > > > > - if ( (rc = copy_special_pages(cd, d)) ) > > > + if ( !skip_special_pages && (rc = copy_special_pages(cd, d)) ) > > > return rc; > > > > > > copy_tsc(cd, d); > > > @@ -1826,9 +1828,11 @@ static int copy_settings(struct domain *cd, struct domain *d) > > > return rc; > > > } > > > > > > -static int fork(struct domain *cd, struct domain *d) > > > +static int fork(struct domain *cd, struct domain *d, uint16_t flags) > > > { > > > int rc = -EBUSY; > > > + bool block_interrupts = flags & XENMEM_FORK_BLOCK_INTERRUPTS; > > > + bool skip_special_pages = flags & XENMEM_FORK_SKIP_SPECIAL_PAGES; > > > > > > if ( !cd->controller_pause_count ) > > > return rc; > > > @@ -1856,7 +1860,13 @@ static int fork(struct domain *cd, struct domain *d) > > > if ( (rc = bring_up_vcpus(cd, d)) ) > > > goto done; > > > > > > - rc = copy_settings(cd, d); > > > + if ( !(rc = copy_settings(cd, d, skip_special_pages)) ) > > > > Can you set > > cd->arch.hvm.mem_sharing.{block_interrupts,skip_special_pages} earlier > > so that you don't need to pass the options around to copy_settings and > > copy_vcpu_settings? > > Would be possible yes, but then we would have to clear them in case > the forking failed at any point. Setting them only at the end when the > fork finished ensures that those fields are only ever valid if the VM > is a fork. Both are valid approaches and I prefer this over the other. I think I'm confused, doesn't the fork get destroyed if there's a failure? In which case the values cd->arch.hvm.mem_sharing.{block_interrupts,skip_special_pages} shouldn't really matter? > > > > > + { > > > + cd->arch.hvm.mem_sharing.block_interrupts = block_interrupts; > > > + cd->arch.hvm.mem_sharing.skip_special_pages = skip_special_pages; > > > + /* skip mapping the vAPIC page on unpause if skipping special pages */ > > > + cd->creation_finished = skip_special_pages; > > > > I think this is dangerous. While it might be true at the moment that > > the arch_domain_creation_finished only maps the vAPIC page, there's no > > guarantee it couldn't do other stuff in the future that could be > > required for the VM to be started. > > I understand this domain_creation_finished route could be expanded in > the future to include other stuff, but IMHO we can evaluate what to do > about that when and if it becomes necessary. This is all experimental > to begin with. Maybe you could check the skip_special_pages field from domain_creation_finished in order to decide whether the vAPIC page needs to be mapped? Thanks, Roger.
On Thu, Mar 24, 2022 at 11:51 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > > On Thu, Mar 24, 2022 at 11:15:08AM -0400, Tamas K Lengyel wrote: > > On Thu, Mar 24, 2022 at 10:50 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > > > > > > On Tue, Mar 22, 2022 at 01:41:37PM -0400, Tamas K Lengyel wrote: > > > > Add option to the fork memop to skip populating the fork with special pages. > > > > These special pages are only necessary when setting up forks to be fully > > > > functional with a toolstack. For short-lived forks where no toolstack is active > > > > these pages are uneccesary. > > > > > > I'm not sure those are strictly related to having a toolstack. For > > > example the vcpu_info has nothing to do with having a toolstack, and > > > is only used by the guest in order to receive events or fetch time > > > related data. So while a short-lived fork might not make use of those, > > > that has nothing to do with having a toolstack or not. > > > > Fair enough, the point is that the short live fork doesn't use these pages. > > > > > > > > > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com> > > > > --- > > > > xen/arch/x86/include/asm/hvm/domain.h | 4 +++- > > > > xen/arch/x86/mm/mem_sharing.c | 33 +++++++++++++++++---------- > > > > xen/include/public/memory.h | 4 ++-- > > > > 3 files changed, 26 insertions(+), 15 deletions(-) > > > > > > > > diff --git a/xen/arch/x86/include/asm/hvm/domain.h b/xen/arch/x86/include/asm/hvm/domain.h > > > > index 698455444e..446cd06411 100644 > > > > --- a/xen/arch/x86/include/asm/hvm/domain.h > > > > +++ b/xen/arch/x86/include/asm/hvm/domain.h > > > > @@ -31,7 +31,9 @@ > > > > #ifdef CONFIG_MEM_SHARING > > > > struct mem_sharing_domain > > > > { > > > > - bool enabled, block_interrupts; > > > > + bool enabled; > > > > + bool block_interrupts; > > > > + bool skip_special_pages; > > > > > > > > /* > > > > * When releasing shared gfn's in a preemptible manner, recall where > > > > diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c > > > > index 15e6a7ed81..84c04ddfa3 100644 > > > > --- a/xen/arch/x86/mm/mem_sharing.c > > > > +++ b/xen/arch/x86/mm/mem_sharing.c > > > > @@ -1643,7 +1643,8 @@ static int bring_up_vcpus(struct domain *cd, struct domain *d) > > > > return 0; > > > > } > > > > > > > > -static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > > > > +static int copy_vcpu_settings(struct domain *cd, const struct domain *d, > > > > + bool skip_special_pages) > > > > { > > > > unsigned int i; > > > > struct p2m_domain *p2m = p2m_get_hostp2m(cd); > > > > @@ -1660,7 +1661,7 @@ static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > > > > > > > > /* Copy & map in the vcpu_info page if the guest uses one */ > > > > vcpu_info_mfn = d_vcpu->vcpu_info_mfn; > > > > - if ( !mfn_eq(vcpu_info_mfn, INVALID_MFN) ) > > > > + if ( !skip_special_pages && !mfn_eq(vcpu_info_mfn, INVALID_MFN) ) > > > > { > > > > mfn_t new_vcpu_info_mfn = cd_vcpu->vcpu_info_mfn; > > > > > > > > @@ -1807,17 +1808,18 @@ static int copy_special_pages(struct domain *cd, struct domain *d) > > > > return 0; > > > > } > > > > > > > > -static int copy_settings(struct domain *cd, struct domain *d) > > > > +static int copy_settings(struct domain *cd, struct domain *d, > > > > + bool skip_special_pages) > > > > { > > > > int rc; > > > > > > > > - if ( (rc = copy_vcpu_settings(cd, d)) ) > > > > + if ( (rc = copy_vcpu_settings(cd, d, skip_special_pages)) ) > > > > return rc; > > > > > > > > if ( (rc = hvm_copy_context_and_params(cd, d)) ) > > > > return rc; > > > > > > > > - if ( (rc = copy_special_pages(cd, d)) ) > > > > + if ( !skip_special_pages && (rc = copy_special_pages(cd, d)) ) > > > > return rc; > > > > > > > > copy_tsc(cd, d); > > > > @@ -1826,9 +1828,11 @@ static int copy_settings(struct domain *cd, struct domain *d) > > > > return rc; > > > > } > > > > > > > > -static int fork(struct domain *cd, struct domain *d) > > > > +static int fork(struct domain *cd, struct domain *d, uint16_t flags) > > > > { > > > > int rc = -EBUSY; > > > > + bool block_interrupts = flags & XENMEM_FORK_BLOCK_INTERRUPTS; > > > > + bool skip_special_pages = flags & XENMEM_FORK_SKIP_SPECIAL_PAGES; > > > > > > > > if ( !cd->controller_pause_count ) > > > > return rc; > > > > @@ -1856,7 +1860,13 @@ static int fork(struct domain *cd, struct domain *d) > > > > if ( (rc = bring_up_vcpus(cd, d)) ) > > > > goto done; > > > > > > > > - rc = copy_settings(cd, d); > > > > + if ( !(rc = copy_settings(cd, d, skip_special_pages)) ) > > > > > > Can you set > > > cd->arch.hvm.mem_sharing.{block_interrupts,skip_special_pages} earlier > > > so that you don't need to pass the options around to copy_settings and > > > copy_vcpu_settings? > > > > Would be possible yes, but then we would have to clear them in case > > the forking failed at any point. Setting them only at the end when the > > fork finished ensures that those fields are only ever valid if the VM > > is a fork. Both are valid approaches and I prefer this over the other. > > I think I'm confused, doesn't the fork get destroyed if there's a > failure? In which case the values > cd->arch.hvm.mem_sharing.{block_interrupts,skip_special_pages} > shouldn't really matter? No, the domain that will be a fork is just a regular domain until the fork operation completes. If the fork operation fails the domain remains. > > > > > > > + { > > > > + cd->arch.hvm.mem_sharing.block_interrupts = block_interrupts; > > > > + cd->arch.hvm.mem_sharing.skip_special_pages = skip_special_pages; > > > > + /* skip mapping the vAPIC page on unpause if skipping special pages */ > > > > + cd->creation_finished = skip_special_pages; > > > > > > I think this is dangerous. While it might be true at the moment that > > > the arch_domain_creation_finished only maps the vAPIC page, there's no > > > guarantee it couldn't do other stuff in the future that could be > > > required for the VM to be started. > > > > I understand this domain_creation_finished route could be expanded in > > the future to include other stuff, but IMHO we can evaluate what to do > > about that when and if it becomes necessary. This is all experimental > > to begin with. > > Maybe you could check the skip_special_pages field from > domain_creation_finished in order to decide whether the vAPIC page > needs to be mapped? Could certainly do that but it means adding experimental code in an #ifdef to the vAPIC mapping logic. Technically nothing wrong with that but I would prefer keeping all this code in a single-place if possible. Tamas
On Thu, Mar 24, 2022 at 12:27:02PM -0400, Tamas K Lengyel wrote: > On Thu, Mar 24, 2022 at 11:51 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > > > > On Thu, Mar 24, 2022 at 11:15:08AM -0400, Tamas K Lengyel wrote: > > > On Thu, Mar 24, 2022 at 10:50 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > > > > > > > > On Tue, Mar 22, 2022 at 01:41:37PM -0400, Tamas K Lengyel wrote: > > > > > + { > > > > > + cd->arch.hvm.mem_sharing.block_interrupts = block_interrupts; > > > > > + cd->arch.hvm.mem_sharing.skip_special_pages = skip_special_pages; > > > > > + /* skip mapping the vAPIC page on unpause if skipping special pages */ > > > > > + cd->creation_finished = skip_special_pages; > > > > > > > > I think this is dangerous. While it might be true at the moment that > > > > the arch_domain_creation_finished only maps the vAPIC page, there's no > > > > guarantee it couldn't do other stuff in the future that could be > > > > required for the VM to be started. > > > > > > I understand this domain_creation_finished route could be expanded in > > > the future to include other stuff, but IMHO we can evaluate what to do > > > about that when and if it becomes necessary. This is all experimental > > > to begin with. > > > > Maybe you could check the skip_special_pages field from > > domain_creation_finished in order to decide whether the vAPIC page > > needs to be mapped? > > Could certainly do that but it means adding experimental code in an > #ifdef to the vAPIC mapping logic. Technically nothing wrong with that > but I would prefer keeping all this code in a single-place if > possible. I see, while I agree it's best to keep the code contained when possible, I think in this case it's better to modify the hook, specially because further changes to domain_creation_finished will easily spot that this path is special cases for VM forks. While the code is experimental it doesn't mean it shouldn't be properly integrated with the rest of the code base. Thanks, Roger.
On Fri, Mar 25, 2022, 6:25 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > On Thu, Mar 24, 2022 at 12:27:02PM -0400, Tamas K Lengyel wrote: > > On Thu, Mar 24, 2022 at 11:51 AM Roger Pau Monné <roger.pau@citrix.com> > wrote: > > > > > > On Thu, Mar 24, 2022 at 11:15:08AM -0400, Tamas K Lengyel wrote: > > > > On Thu, Mar 24, 2022 at 10:50 AM Roger Pau Monné < > roger.pau@citrix.com> wrote: > > > > > > > > > > On Tue, Mar 22, 2022 at 01:41:37PM -0400, Tamas K Lengyel wrote: > > > > > > + { > > > > > > + cd->arch.hvm.mem_sharing.block_interrupts = > block_interrupts; > > > > > > + cd->arch.hvm.mem_sharing.skip_special_pages = > skip_special_pages; > > > > > > + /* skip mapping the vAPIC page on unpause if skipping > special pages */ > > > > > > + cd->creation_finished = skip_special_pages; > > > > > > > > > > I think this is dangerous. While it might be true at the moment > that > > > > > the arch_domain_creation_finished only maps the vAPIC page, > there's no > > > > > guarantee it couldn't do other stuff in the future that could be > > > > > required for the VM to be started. > > > > > > > > I understand this domain_creation_finished route could be expanded in > > > > the future to include other stuff, but IMHO we can evaluate what to > do > > > > about that when and if it becomes necessary. This is all experimental > > > > to begin with. > > > > > > Maybe you could check the skip_special_pages field from > > > domain_creation_finished in order to decide whether the vAPIC page > > > needs to be mapped? > > > > Could certainly do that but it means adding experimental code in an > > #ifdef to the vAPIC mapping logic. Technically nothing wrong with that > > but I would prefer keeping all this code in a single-place if > > possible. > > I see, while I agree it's best to keep the code contained when > possible, I think in this case it's better to modify the hook, > specially because further changes to domain_creation_finished will > easily spot that this path is special cases for VM forks. > > While the code is experimental it doesn't mean it shouldn't be > properly integrated with the rest of the code base. > Sure, I'm fine with moving it there. Tamas >
On Tue, Mar 22, 2022 at 01:41:37PM -0400, Tamas K Lengyel wrote: > Add option to the fork memop to skip populating the fork with special pages. > These special pages are only necessary when setting up forks to be fully > functional with a toolstack. For short-lived forks where no toolstack is active > these pages are uneccesary. Replying here because there's no cover letter AFAICT. For this kind of performance related changes it would be better if you could provide some figures about the performance impact. It would help if we knew whether avoiding mapping the vAPIC page means you can create 0.1% more forks per-minute or 20%. If you really want to speed up the forking path it might be good to start by perf sampling Xen in order to find the bottlenecks? Thanks, Roger.
On Fri, Mar 25, 2022, 6:59 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > On Tue, Mar 22, 2022 at 01:41:37PM -0400, Tamas K Lengyel wrote: > > Add option to the fork memop to skip populating the fork with special > pages. > > These special pages are only necessary when setting up forks to be fully > > functional with a toolstack. For short-lived forks where no toolstack is > active > > these pages are uneccesary. > > Replying here because there's no cover letter AFAICT. > > For this kind of performance related changes it would be better if you > could provide some figures about the performance impact. It would help > if we knew whether avoiding mapping the vAPIC page means you can > create 0.1% more forks per-minute or 20%. > > If you really want to speed up the forking path it might be good to start > by perf sampling Xen in order to find the bottlenecks? > Sure but for experiment systems I don't think its necessary to collect that data. There is also a non-performance reason why we want to keep special pages from being populated, in cases we really want the forks physmap to start empty for better control over its state. There was already a case where having special pages mapped in ended up triggering unexpected Xen behaviors leading to chain of events not easy to follow. For example if page 0 gets brought in while the vCPU is being created it ends up as a misconfigured ept entry if nested virtualization is enabled. That leads to ept misconfiguration exits instead of epf faults. Simply enforcing no entry in the physmap until forking is complete eliminates the chance of something like that happening again and makes reasoning about the VM's behavior from the start easier. Tamas >
On Fri, Mar 25, 2022 at 07:15:59AM -0400, Tamas K Lengyel wrote: > On Fri, Mar 25, 2022, 6:59 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > > > On Tue, Mar 22, 2022 at 01:41:37PM -0400, Tamas K Lengyel wrote: > > > Add option to the fork memop to skip populating the fork with special > > pages. > > > These special pages are only necessary when setting up forks to be fully > > > functional with a toolstack. For short-lived forks where no toolstack is > > active > > > these pages are uneccesary. > > > > Replying here because there's no cover letter AFAICT. > > > > For this kind of performance related changes it would be better if you > > could provide some figures about the performance impact. It would help > > if we knew whether avoiding mapping the vAPIC page means you can > > create 0.1% more forks per-minute or 20%. > > > > If you really want to speed up the forking path it might be good to start > > by perf sampling Xen in order to find the bottlenecks? > > > > Sure but for experiment systems I don't think its necessary to collect that > data. It helps weight whether the extra logic is worth the performance benefit IMO. Here it might not matter that much since you say there's also a non-performance reason for the change. > There is also a non-performance reason why we want to keep special pages > from being populated, in cases we really want the forks physmap to start > empty for better control over its state. There was already a case where > having special pages mapped in ended up triggering unexpected Xen behaviors > leading to chain of events not easy to follow. For example if page 0 gets > brought in while the vCPU is being created it ends up as a misconfigured > ept entry if nested virtualization is enabled. That leads to ept > misconfiguration exits instead of epf faults. Simply enforcing no entry in > the physmap until forking is complete eliminates the chance of something > like that happening again and makes reasoning about the VM's behavior from > the start easier. Could we have this added to the commit message then, and the option renamed to 'empty_p2m' or some such. Then you should also ASSERT that at the end of the fork process the p2m is indeed empty, not sure if checking d->arch.paging.hap.p2m_pages == 0 would accomplish that? Thanks, Roger.
On Fri, Mar 25, 2022, 7:31 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > On Fri, Mar 25, 2022 at 07:15:59AM -0400, Tamas K Lengyel wrote: > > On Fri, Mar 25, 2022, 6:59 AM Roger Pau Monné <roger.pau@citrix.com> > wrote: > > > > > On Tue, Mar 22, 2022 at 01:41:37PM -0400, Tamas K Lengyel wrote: > > > > Add option to the fork memop to skip populating the fork with special > > > pages. > > > > These special pages are only necessary when setting up forks to be > fully > > > > functional with a toolstack. For short-lived forks where no > toolstack is > > > active > > > > these pages are uneccesary. > > > > > > Replying here because there's no cover letter AFAICT. > > > > > > For this kind of performance related changes it would be better if you > > > could provide some figures about the performance impact. It would help > > > if we knew whether avoiding mapping the vAPIC page means you can > > > create 0.1% more forks per-minute or 20%. > > > > > > If you really want to speed up the forking path it might be good to > start > > > by perf sampling Xen in order to find the bottlenecks? > > > > > > > Sure but for experiment systems I don't think its necessary to collect > that > > data. > > It helps weight whether the extra logic is worth the performance > benefit IMO. Here it might not matter that much since you say there's > also a non-performance reason for the change. > > > There is also a non-performance reason why we want to keep special pages > > from being populated, in cases we really want the forks physmap to start > > empty for better control over its state. There was already a case where > > having special pages mapped in ended up triggering unexpected Xen > behaviors > > leading to chain of events not easy to follow. For example if page 0 gets > > brought in while the vCPU is being created it ends up as a misconfigured > > ept entry if nested virtualization is enabled. That leads to ept > > misconfiguration exits instead of epf faults. Simply enforcing no entry > in > > the physmap until forking is complete eliminates the chance of something > > like that happening again and makes reasoning about the VM's behavior > from > > the start easier. > > Could we have this added to the commit message then, and the option > renamed to 'empty_p2m' or some such. Then you should also ASSERT that > at the end of the fork process the p2m is indeed empty, not sure if > checking d->arch.paging.hap.p2m_pages == 0 would accomplish that? > Sure, I can do that. Thanks, Tamas >
diff --git a/xen/arch/x86/include/asm/hvm/domain.h b/xen/arch/x86/include/asm/hvm/domain.h index 698455444e..446cd06411 100644 --- a/xen/arch/x86/include/asm/hvm/domain.h +++ b/xen/arch/x86/include/asm/hvm/domain.h @@ -31,7 +31,9 @@ #ifdef CONFIG_MEM_SHARING struct mem_sharing_domain { - bool enabled, block_interrupts; + bool enabled; + bool block_interrupts; + bool skip_special_pages; /* * When releasing shared gfn's in a preemptible manner, recall where diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c index 15e6a7ed81..84c04ddfa3 100644 --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -1643,7 +1643,8 @@ static int bring_up_vcpus(struct domain *cd, struct domain *d) return 0; } -static int copy_vcpu_settings(struct domain *cd, const struct domain *d) +static int copy_vcpu_settings(struct domain *cd, const struct domain *d, + bool skip_special_pages) { unsigned int i; struct p2m_domain *p2m = p2m_get_hostp2m(cd); @@ -1660,7 +1661,7 @@ static int copy_vcpu_settings(struct domain *cd, const struct domain *d) /* Copy & map in the vcpu_info page if the guest uses one */ vcpu_info_mfn = d_vcpu->vcpu_info_mfn; - if ( !mfn_eq(vcpu_info_mfn, INVALID_MFN) ) + if ( !skip_special_pages && !mfn_eq(vcpu_info_mfn, INVALID_MFN) ) { mfn_t new_vcpu_info_mfn = cd_vcpu->vcpu_info_mfn; @@ -1807,17 +1808,18 @@ static int copy_special_pages(struct domain *cd, struct domain *d) return 0; } -static int copy_settings(struct domain *cd, struct domain *d) +static int copy_settings(struct domain *cd, struct domain *d, + bool skip_special_pages) { int rc; - if ( (rc = copy_vcpu_settings(cd, d)) ) + if ( (rc = copy_vcpu_settings(cd, d, skip_special_pages)) ) return rc; if ( (rc = hvm_copy_context_and_params(cd, d)) ) return rc; - if ( (rc = copy_special_pages(cd, d)) ) + if ( !skip_special_pages && (rc = copy_special_pages(cd, d)) ) return rc; copy_tsc(cd, d); @@ -1826,9 +1828,11 @@ static int copy_settings(struct domain *cd, struct domain *d) return rc; } -static int fork(struct domain *cd, struct domain *d) +static int fork(struct domain *cd, struct domain *d, uint16_t flags) { int rc = -EBUSY; + bool block_interrupts = flags & XENMEM_FORK_BLOCK_INTERRUPTS; + bool skip_special_pages = flags & XENMEM_FORK_SKIP_SPECIAL_PAGES; if ( !cd->controller_pause_count ) return rc; @@ -1856,7 +1860,13 @@ static int fork(struct domain *cd, struct domain *d) if ( (rc = bring_up_vcpus(cd, d)) ) goto done; - rc = copy_settings(cd, d); + if ( !(rc = copy_settings(cd, d, skip_special_pages)) ) + { + cd->arch.hvm.mem_sharing.block_interrupts = block_interrupts; + cd->arch.hvm.mem_sharing.skip_special_pages = skip_special_pages; + /* skip mapping the vAPIC page on unpause if skipping special pages */ + cd->creation_finished = skip_special_pages; + } done: if ( rc && rc != -ERESTART ) @@ -1920,7 +1930,7 @@ static int mem_sharing_fork_reset(struct domain *d) } spin_unlock_recursive(&d->page_alloc_lock); - rc = copy_settings(d, pd); + rc = copy_settings(d, pd, d->arch.hvm.mem_sharing.skip_special_pages); domain_unpause(d); @@ -2190,7 +2200,8 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) if ( mso.u.fork.pad ) goto out; if ( mso.u.fork.flags & - ~(XENMEM_FORK_WITH_IOMMU_ALLOWED | XENMEM_FORK_BLOCK_INTERRUPTS) ) + ~(XENMEM_FORK_WITH_IOMMU_ALLOWED | XENMEM_FORK_BLOCK_INTERRUPTS | + XENMEM_FORK_SKIP_SPECIAL_PAGES) ) goto out; rc = rcu_lock_live_remote_domain_by_id(mso.u.fork.parent_domain, @@ -2212,14 +2223,12 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg) goto out; } - rc = fork(d, pd); + rc = fork(d, pd, mso.u.fork.flags); if ( rc == -ERESTART ) rc = hypercall_create_continuation(__HYPERVISOR_memory_op, "lh", XENMEM_sharing_op, arg); - else if ( !rc && (mso.u.fork.flags & XENMEM_FORK_BLOCK_INTERRUPTS) ) - d->arch.hvm.mem_sharing.block_interrupts = true; rcu_unlock_domain(pd); break; diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h index a1a0f0233a..208d8dcbd9 100644 --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -543,10 +543,10 @@ struct xen_mem_sharing_op { } debug; struct mem_sharing_op_fork { /* OP_FORK */ domid_t parent_domain; /* IN: parent's domain id */ -/* Only makes sense for short-lived forks */ +/* These flags only makes sense for short-lived forks */ #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0) -/* Only makes sense for short-lived forks */ #define XENMEM_FORK_BLOCK_INTERRUPTS (1u << 1) +#define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2) uint16_t flags; /* IN: optional settings */ uint32_t pad; /* Must be set to 0 */ } fork;
Add option to the fork memop to skip populating the fork with special pages. These special pages are only necessary when setting up forks to be fully functional with a toolstack. For short-lived forks where no toolstack is active these pages are uneccesary. Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com> --- xen/arch/x86/include/asm/hvm/domain.h | 4 +++- xen/arch/x86/mm/mem_sharing.c | 33 +++++++++++++++++---------- xen/include/public/memory.h | 4 ++-- 3 files changed, 26 insertions(+), 15 deletions(-)