diff mbox series

[v2,39/55] x86: switch root_pgt to mfn_t and use new APIs

Message ID f8b0b15146c357270fb0978f3ec50eea4695dc1c.1569833766.git.hongyax@amazon.com (mailing list archive)
State New, archived
Headers show
Series Switch to domheap for Xen PTEs | expand

Commit Message

Xia, Hongyan Sept. 30, 2019, 10:33 a.m. UTC
From: Wei Liu <wei.liu2@citrix.com>

This then requires moving declaration of root page table mfn into mm.h
and modify setup_cpu_root_pgt to have a single exit path.

We also need to force map_domain_page to use direct map when switching
per-domain mappings. This is contrary to our end goal of removing
direct map, but this will be removed once we make map_domain_page
context-switch safe in another (large) patch series.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 xen/arch/x86/domain.c           | 15 ++++++++++---
 xen/arch/x86/domain_page.c      |  2 +-
 xen/arch/x86/mm.c               |  2 +-
 xen/arch/x86/pv/domain.c        |  2 +-
 xen/arch/x86/smpboot.c          | 40 ++++++++++++++++++++++-----------
 xen/include/asm-x86/mm.h        |  2 ++
 xen/include/asm-x86/processor.h |  2 +-
 7 files changed, 45 insertions(+), 20 deletions(-)

Comments

Xia, Hongyan Oct. 1, 2019, 1:54 p.m. UTC | #1
On 30/09/2019 11:33, Hongyan Xia wrote:
> From: Wei Liu <wei.liu2@citrix.com>
> 
> This then requires moving declaration of root page table mfn into mm.h
> and modify setup_cpu_root_pgt to have a single exit path.
> 
> We also need to force map_domain_page to use direct map when switching
> per-domain mappings. This is contrary to our end goal of removing
> direct map, but this will be removed once we make map_domain_page
> context-switch safe in another (large) patch series.
> 
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> ---
>   xen/arch/x86/domain.c           | 15 ++++++++++---
>   xen/arch/x86/domain_page.c      |  2 +-
>   xen/arch/x86/mm.c               |  2 +-
>   xen/arch/x86/pv/domain.c        |  2 +-
>   xen/arch/x86/smpboot.c          | 40 ++++++++++++++++++++++-----------
>   xen/include/asm-x86/mm.h        |  2 ++
>   xen/include/asm-x86/processor.h |  2 +-
>   7 files changed, 45 insertions(+), 20 deletions(-)
> 
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index dbdf6b1bc2..e9bf47efce 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -69,6 +69,7 @@
>   #include <asm/pv/domain.h>
>   #include <asm/pv/mm.h>
>   #include <asm/spec_ctrl.h>
> +#include <asm/setup.h>
>   
>   DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
>   
> @@ -1580,12 +1581,20 @@ void paravirt_ctxt_switch_from(struct vcpu *v)
>   
>   void paravirt_ctxt_switch_to(struct vcpu *v)
>   {
> -    root_pgentry_t *root_pgt = this_cpu(root_pgt);
> +    mfn_t rpt_mfn = this_cpu(root_pgt_mfn);
>   
> -    if ( root_pgt )
> -        root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] =
> +    if ( !mfn_eq(rpt_mfn, INVALID_MFN) )
> +    {
> +        root_pgentry_t *rpt;
> +
> +        mapcache_override_current(INVALID_VCPU);
> +        rpt = map_xen_pagetable_new(rpt_mfn);
> +        rpt[root_table_offset(PERDOMAIN_VIRT_START)] =
>               l4e_from_page(v->domain->arch.perdomain_l3_pg,
>                             __PAGE_HYPERVISOR_RW);
> +        UNMAP_XEN_PAGETABLE_NEW(rpt);
> +        mapcache_override_current(NULL);
> +    }
>   
>       if ( unlikely(v->arch.dr7 & DR7_ACTIVE_MASK) )
>           activate_debugregs(v);

I am having second thoughts on whether I should include this patch for now. 
Obviously the per-domain mapcache in its current form cannot be used here 
during the context switch. However, I also don't want to use PMAP because it is 
just a bootstrapping mechanism and may result in heavy lock contention here.

I am inclined to drop it for now and include this after we have a 
context-switch safe mapping mechanism, as the commit message suggests.

Hongyan
Wei Liu Oct. 1, 2019, 3:20 p.m. UTC | #2
On Tue, Oct 01, 2019 at 02:54:19PM +0100, Hongyan Xia wrote:
> On 30/09/2019 11:33, Hongyan Xia wrote:
> > From: Wei Liu <wei.liu2@citrix.com>
> > 
> > This then requires moving declaration of root page table mfn into mm.h
> > and modify setup_cpu_root_pgt to have a single exit path.
> > 
> > We also need to force map_domain_page to use direct map when switching
> > per-domain mappings. This is contrary to our end goal of removing
> > direct map, but this will be removed once we make map_domain_page
> > context-switch safe in another (large) patch series.
> > 
> > Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> > ---
> >   xen/arch/x86/domain.c           | 15 ++++++++++---
> >   xen/arch/x86/domain_page.c      |  2 +-
> >   xen/arch/x86/mm.c               |  2 +-
> >   xen/arch/x86/pv/domain.c        |  2 +-
> >   xen/arch/x86/smpboot.c          | 40 ++++++++++++++++++++++-----------
> >   xen/include/asm-x86/mm.h        |  2 ++
> >   xen/include/asm-x86/processor.h |  2 +-
> >   7 files changed, 45 insertions(+), 20 deletions(-)
> > 
> > diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> > index dbdf6b1bc2..e9bf47efce 100644
> > --- a/xen/arch/x86/domain.c
> > +++ b/xen/arch/x86/domain.c
> > @@ -69,6 +69,7 @@
> >   #include <asm/pv/domain.h>
> >   #include <asm/pv/mm.h>
> >   #include <asm/spec_ctrl.h>
> > +#include <asm/setup.h>
> >   DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
> > @@ -1580,12 +1581,20 @@ void paravirt_ctxt_switch_from(struct vcpu *v)
> >   void paravirt_ctxt_switch_to(struct vcpu *v)
> >   {
> > -    root_pgentry_t *root_pgt = this_cpu(root_pgt);
> > +    mfn_t rpt_mfn = this_cpu(root_pgt_mfn);
> > -    if ( root_pgt )
> > -        root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] =
> > +    if ( !mfn_eq(rpt_mfn, INVALID_MFN) )
> > +    {
> > +        root_pgentry_t *rpt;
> > +
> > +        mapcache_override_current(INVALID_VCPU);
> > +        rpt = map_xen_pagetable_new(rpt_mfn);
> > +        rpt[root_table_offset(PERDOMAIN_VIRT_START)] =
> >               l4e_from_page(v->domain->arch.perdomain_l3_pg,
> >                             __PAGE_HYPERVISOR_RW);
> > +        UNMAP_XEN_PAGETABLE_NEW(rpt);
> > +        mapcache_override_current(NULL);
> > +    }
> >       if ( unlikely(v->arch.dr7 & DR7_ACTIVE_MASK) )
> >           activate_debugregs(v);
> 
> I am having second thoughts on whether I should include this patch for now.
> Obviously the per-domain mapcache in its current form cannot be used here
> during the context switch. However, I also don't want to use PMAP because it
> is just a bootstrapping mechanism and may result in heavy lock contention
> here.
> 
> I am inclined to drop it for now and include this after we have a
> context-switch safe mapping mechanism, as the commit message suggests.
> 

Dropping this patch is of course fine. Then you need to consider how to
make the rest of the series remain applicable to staging.

I guess the plan in the short term is too keep a global mapping for each
root page table, right?

Wei.
Xia, Hongyan Oct. 1, 2019, 3:31 p.m. UTC | #3
On 01/10/2019 16:20, Wei Liu wrote:
> On Tue, Oct 01, 2019 at 02:54:19PM +0100, Hongyan Xia wrote:
>> On 30/09/2019 11:33, Hongyan Xia wrote:
>>> From: Wei Liu <wei.liu2@citrix.com>
>>>
>>> This then requires moving declaration of root page table mfn into mm.h
>>> and modify setup_cpu_root_pgt to have a single exit path.
>>>
>>> We also need to force map_domain_page to use direct map when switching
>>> per-domain mappings. This is contrary to our end goal of removing
>>> direct map, but this will be removed once we make map_domain_page
>>> context-switch safe in another (large) patch series.
>>>
>>> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
>>> ---
>>>    xen/arch/x86/domain.c           | 15 ++++++++++---
>>>    xen/arch/x86/domain_page.c      |  2 +-
>>>    xen/arch/x86/mm.c               |  2 +-
>>>    xen/arch/x86/pv/domain.c        |  2 +-
>>>    xen/arch/x86/smpboot.c          | 40 ++++++++++++++++++++++-----------
>>>    xen/include/asm-x86/mm.h        |  2 ++
>>>    xen/include/asm-x86/processor.h |  2 +-
>>>    7 files changed, 45 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
>>> index dbdf6b1bc2..e9bf47efce 100644
>>> --- a/xen/arch/x86/domain.c
>>> +++ b/xen/arch/x86/domain.c
>>> @@ -69,6 +69,7 @@
>>>    #include <asm/pv/domain.h>
>>>    #include <asm/pv/mm.h>
>>>    #include <asm/spec_ctrl.h>
>>> +#include <asm/setup.h>
>>>    DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
>>> @@ -1580,12 +1581,20 @@ void paravirt_ctxt_switch_from(struct vcpu *v)
>>>    void paravirt_ctxt_switch_to(struct vcpu *v)
>>>    {
>>> -    root_pgentry_t *root_pgt = this_cpu(root_pgt);
>>> +    mfn_t rpt_mfn = this_cpu(root_pgt_mfn);
>>> -    if ( root_pgt )
>>> -        root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] =
>>> +    if ( !mfn_eq(rpt_mfn, INVALID_MFN) )
>>> +    {
>>> +        root_pgentry_t *rpt;
>>> +
>>> +        mapcache_override_current(INVALID_VCPU);
>>> +        rpt = map_xen_pagetable_new(rpt_mfn);
>>> +        rpt[root_table_offset(PERDOMAIN_VIRT_START)] =
>>>                l4e_from_page(v->domain->arch.perdomain_l3_pg,
>>>                              __PAGE_HYPERVISOR_RW);
>>> +        UNMAP_XEN_PAGETABLE_NEW(rpt);
>>> +        mapcache_override_current(NULL);
>>> +    }
>>>        if ( unlikely(v->arch.dr7 & DR7_ACTIVE_MASK) )
>>>            activate_debugregs(v);
>>
>> I am having second thoughts on whether I should include this patch for now.
>> Obviously the per-domain mapcache in its current form cannot be used here
>> during the context switch. However, I also don't want to use PMAP because it
>> is just a bootstrapping mechanism and may result in heavy lock contention
>> here.
>>
>> I am inclined to drop it for now and include this after we have a
>> context-switch safe mapping mechanism, as the commit message suggests.
>>
> 
> Dropping this patch is of course fine. Then you need to consider how to
> make the rest of the series remain applicable to staging.

I will make sure the series still applies after dropping it.

> 
> I guess the plan in the short term is too keep a global mapping for each
> root page table, right?

Yes. I have changed rpts to be xenheap pages in my next revision, which so far 
works happily without the direct map.

> 
> Wei.
>
diff mbox series

Patch

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index dbdf6b1bc2..e9bf47efce 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -69,6 +69,7 @@ 
 #include <asm/pv/domain.h>
 #include <asm/pv/mm.h>
 #include <asm/spec_ctrl.h>
+#include <asm/setup.h>
 
 DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
 
@@ -1580,12 +1581,20 @@  void paravirt_ctxt_switch_from(struct vcpu *v)
 
 void paravirt_ctxt_switch_to(struct vcpu *v)
 {
-    root_pgentry_t *root_pgt = this_cpu(root_pgt);
+    mfn_t rpt_mfn = this_cpu(root_pgt_mfn);
 
-    if ( root_pgt )
-        root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] =
+    if ( !mfn_eq(rpt_mfn, INVALID_MFN) )
+    {
+        root_pgentry_t *rpt;
+
+        mapcache_override_current(INVALID_VCPU);
+        rpt = map_xen_pagetable_new(rpt_mfn);
+        rpt[root_table_offset(PERDOMAIN_VIRT_START)] =
             l4e_from_page(v->domain->arch.perdomain_l3_pg,
                           __PAGE_HYPERVISOR_RW);
+        UNMAP_XEN_PAGETABLE_NEW(rpt);
+        mapcache_override_current(NULL);
+    }
 
     if ( unlikely(v->arch.dr7 & DR7_ACTIVE_MASK) )
         activate_debugregs(v);
diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
index 24083e9a86..cfcffd35f3 100644
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -57,7 +57,7 @@  static inline struct vcpu *mapcache_current_vcpu(void)
     return v;
 }
 
-void __init mapcache_override_current(struct vcpu *v)
+void mapcache_override_current(struct vcpu *v)
 {
     this_cpu(override) = v;
 }
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 8706dc0174..5c1d65d267 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -530,7 +530,7 @@  void write_ptbase(struct vcpu *v)
     if ( is_pv_vcpu(v) && v->domain->arch.pv.xpti )
     {
         cpu_info->root_pgt_changed = true;
-        cpu_info->pv_cr3 = __pa(this_cpu(root_pgt));
+        cpu_info->pv_cr3 = mfn_to_maddr(this_cpu(root_pgt_mfn));
         if ( new_cr4 & X86_CR4_PCIDE )
             cpu_info->pv_cr3 |= get_pcid_bits(v, true);
         switch_cr3_cr4(v->arch.cr3, new_cr4);
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 4b6f48dea2..7e70690f03 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -360,7 +360,7 @@  static void _toggle_guest_pt(struct vcpu *v)
     if ( d->arch.pv.xpti )
     {
         cpu_info->root_pgt_changed = true;
-        cpu_info->pv_cr3 = __pa(this_cpu(root_pgt)) |
+        cpu_info->pv_cr3 = mfn_to_maddr(this_cpu(root_pgt_mfn)) |
                            (d->arch.pv.pcid ? get_pcid_bits(v, true) : 0);
     }
 
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index c55aaa65a2..ca8fc6d485 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -811,7 +811,7 @@  static int clone_mapping(const void *ptr, root_pgentry_t *rpt)
     return rc;
 }
 
-DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
+DEFINE_PER_CPU(mfn_t, root_pgt_mfn);
 
 static root_pgentry_t common_pgt;
 
@@ -819,19 +819,27 @@  extern const char _stextentry[], _etextentry[];
 
 static int setup_cpu_root_pgt(unsigned int cpu)
 {
-    root_pgentry_t *rpt;
+    root_pgentry_t *rpt = NULL;
+    mfn_t rpt_mfn;
     unsigned int off;
     int rc;
 
     if ( !opt_xpti_hwdom && !opt_xpti_domu )
-        return 0;
+    {
+        rc = 0;
+        goto out;
+    }
 
-    rpt = alloc_xen_pagetable();
-    if ( !rpt )
-        return -ENOMEM;
+    rpt_mfn = alloc_xen_pagetable_new();
+    if ( mfn_eq(rpt_mfn, INVALID_MFN) )
+    {
+        rc = -ENOMEM;
+        goto out;
+    }
 
+    rpt = map_xen_pagetable_new(rpt_mfn);
     clear_page(rpt);
-    per_cpu(root_pgt, cpu) = rpt;
+    per_cpu(root_pgt_mfn, cpu) = rpt_mfn;
 
     rpt[root_table_offset(RO_MPT_VIRT_START)] =
         idle_pg_table[root_table_offset(RO_MPT_VIRT_START)];
@@ -848,7 +856,7 @@  static int setup_cpu_root_pgt(unsigned int cpu)
             rc = clone_mapping(ptr, rpt);
 
         if ( rc )
-            return rc;
+            goto out;
 
         common_pgt = rpt[root_table_offset(XEN_VIRT_START)];
     }
@@ -873,19 +881,24 @@  static int setup_cpu_root_pgt(unsigned int cpu)
     if ( !rc )
         rc = clone_mapping((void *)per_cpu(stubs.addr, cpu), rpt);
 
+ out:
+    UNMAP_XEN_PAGETABLE_NEW(rpt);
     return rc;
 }
 
 static void cleanup_cpu_root_pgt(unsigned int cpu)
 {
-    root_pgentry_t *rpt = per_cpu(root_pgt, cpu);
+    mfn_t rpt_mfn = per_cpu(root_pgt_mfn, cpu);
+    root_pgentry_t *rpt;
     unsigned int r;
     unsigned long stub_linear = per_cpu(stubs.addr, cpu);
 
-    if ( !rpt )
+    if ( mfn_eq(rpt_mfn, INVALID_MFN) )
         return;
 
-    per_cpu(root_pgt, cpu) = NULL;
+    per_cpu(root_pgt_mfn, cpu) = INVALID_MFN;
+
+    rpt = map_xen_pagetable_new(rpt_mfn);
 
     for ( r = root_table_offset(DIRECTMAP_VIRT_START);
           r < root_table_offset(HYPERVISOR_VIRT_END); ++r )
@@ -930,7 +943,8 @@  static void cleanup_cpu_root_pgt(unsigned int cpu)
         free_xen_pagetable_new(l3t_mfn);
     }
 
-    free_xen_pagetable(rpt);
+    UNMAP_XEN_PAGETABLE_NEW(rpt);
+    free_xen_pagetable_new(rpt_mfn);
 
     /* Also zap the stub mapping for this CPU. */
     if ( stub_linear )
@@ -1134,7 +1148,7 @@  void __init smp_prepare_cpus(void)
     rc = setup_cpu_root_pgt(0);
     if ( rc )
         panic("Error %d setting up PV root page table\n", rc);
-    if ( per_cpu(root_pgt, 0) )
+    if ( !mfn_eq(per_cpu(root_pgt_mfn, 0), INVALID_MFN) )
     {
         get_cpu_info()->pv_cr3 = 0;
 
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 80173eb4c3..12a10b270d 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -646,4 +646,6 @@  void free_xen_pagetable_new(mfn_t mfn);
 
 l1_pgentry_t *virt_to_xen_l1e(unsigned long v);
 
+DECLARE_PER_CPU(mfn_t, root_pgt_mfn);
+
 #endif /* __ASM_X86_MM_H__ */
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index c6fc1987a1..68d1d82071 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -469,7 +469,7 @@  static inline void disable_each_ist(idt_entry_t *idt)
 extern idt_entry_t idt_table[];
 extern idt_entry_t *idt_tables[];
 
-DECLARE_PER_CPU(root_pgentry_t *, root_pgt);
+DECLARE_PER_CPU(struct tss_struct, init_tss);
 
 extern void write_ptbase(struct vcpu *v);