diff mbox series

[v2,07/18] x86/pv: update guest LDT mappings using the linear entries

Message ID 20250108142659.99490-8-roger.pau@citrix.com (mailing list archive)
State New
Headers show
Series x86: adventures in Address Space Isolation | expand

Commit Message

Roger Pau Monné Jan. 8, 2025, 2:26 p.m. UTC
The pv_map_ldt_shadow_page() and pv_destroy_ldt() functions rely on the L1
table(s) that contain such mappings being stashed in the domain structure, and
thus such mappings being modified by merely updating the require L1 entries.

Switch pv_map_ldt_shadow_page() to unconditionally use the linear recursive, as
that logic is always called while the vCPU is running on the current pCPU.

For pv_destroy_ldt() use the linear mappings if the vCPU is the one currently
running on the pCPU, otherwise use destroy_mappings().

Note this requires keeping an array with the pages currently mapped at the LDT
area, as that allows dropping the extra taken page reference when removing the
mappings.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/include/asm/domain.h   |  2 ++
 xen/arch/x86/pv/descriptor-tables.c | 19 ++++++++++---------
 xen/arch/x86/pv/domain.c            |  4 ++++
 xen/arch/x86/pv/mm.c                |  3 ++-
 4 files changed, 18 insertions(+), 10 deletions(-)

Comments

Alejandro Vallejo Jan. 9, 2025, 2:34 p.m. UTC | #1
On Wed Jan 8, 2025 at 2:26 PM GMT, Roger Pau Monne wrote:
> The pv_map_ldt_shadow_page() and pv_destroy_ldt() functions rely on the L1
> table(s) that contain such mappings being stashed in the domain structure, and
> thus such mappings being modified by merely updating the require L1 entries.
>
> Switch pv_map_ldt_shadow_page() to unconditionally use the linear recursive, as
> that logic is always called while the vCPU is running on the current pCPU.
>
> For pv_destroy_ldt() use the linear mappings if the vCPU is the one currently
> running on the pCPU, otherwise use destroy_mappings().
>
> Note this requires keeping an array with the pages currently mapped at the LDT
> area, as that allows dropping the extra taken page reference when removing the
> mappings.
>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
>  xen/arch/x86/include/asm/domain.h   |  2 ++
>  xen/arch/x86/pv/descriptor-tables.c | 19 ++++++++++---------
>  xen/arch/x86/pv/domain.c            |  4 ++++
>  xen/arch/x86/pv/mm.c                |  3 ++-
>  4 files changed, 18 insertions(+), 10 deletions(-)
>
> diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
> index b79d6badd71c..b659cffc7f81 100644
> --- a/xen/arch/x86/include/asm/domain.h
> +++ b/xen/arch/x86/include/asm/domain.h
> @@ -523,6 +523,8 @@ struct pv_vcpu
>      struct trap_info *trap_ctxt;
>  
>      unsigned long gdt_frames[FIRST_RESERVED_GDT_PAGE];
> +    /* Max LDT entries is 8192, so 8192 * 8 = 64KiB (16 pages). */
> +    mfn_t ldt_frames[16];
>      unsigned long ldt_base;
>      unsigned int gdt_ents, ldt_ents;
>  
> diff --git a/xen/arch/x86/pv/descriptor-tables.c b/xen/arch/x86/pv/descriptor-tables.c
> index 5a79f022ce13..95b598a4c0cf 100644
> --- a/xen/arch/x86/pv/descriptor-tables.c
> +++ b/xen/arch/x86/pv/descriptor-tables.c
> @@ -20,28 +20,29 @@
>   */
>  bool pv_destroy_ldt(struct vcpu *v)
>  {
> -    l1_pgentry_t *pl1e;
> +    const unsigned int nr_frames = ARRAY_SIZE(v->arch.pv.ldt_frames);
>      unsigned int i, mappings_dropped = 0;
> -    struct page_info *page;
>  
>      ASSERT(!in_irq());
>  
>      ASSERT(v == current || !vcpu_cpu_dirty(v));
>  
> -    pl1e = pv_ldt_ptes(v);
> +    destroy_perdomain_mapping(v, LDT_VIRT_START(v), nr_frames);
>  
> -    for ( i = 0; i < 16; i++ )
> +    for ( i = 0; i < nr_frames; i++ )

nit: While at this, can the "unsigned int" be moved here too?

>      {
> -        if ( !(l1e_get_flags(pl1e[i]) & _PAGE_PRESENT) )
> -            continue;
> +        mfn_t mfn = v->arch.pv.ldt_frames[i];
> +        struct page_info *page;
>  
> -        page = l1e_get_page(pl1e[i]);
> -        l1e_write(&pl1e[i], l1e_empty());
> -        mappings_dropped++;
> +        if ( mfn_eq(mfn, INVALID_MFN) )
> +            continue;

Can it really be disjoint? As in, why "continue" and not "break"?. Not that it
matters in the slightest, and I prefer this form; but I'm curious.

>  
> +        v->arch.pv.ldt_frames[i] = INVALID_MFN;
> +        page = mfn_to_page(mfn);
>          ASSERT_PAGE_IS_TYPE(page, PGT_seg_desc_page);
>          ASSERT_PAGE_IS_DOMAIN(page, v->domain);
>          put_page_and_type(page);
> +        mappings_dropped++;
>      }
>  
>      return mappings_dropped;
> diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
> index 7e8bffaae9a0..32d7488cc186 100644
> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -303,6 +303,7 @@ void pv_vcpu_destroy(struct vcpu *v)
>  int pv_vcpu_initialise(struct vcpu *v)
>  {
>      struct domain *d = v->domain;
> +    unsigned int i;
>      int rc;
>  
>      ASSERT(!is_idle_domain(d));
> @@ -311,6 +312,9 @@ int pv_vcpu_initialise(struct vcpu *v)
>      if ( rc )
>          return rc;
>  
> +    for ( i = 0; i < ARRAY_SIZE(v->arch.pv.ldt_frames); i++ )
> +        v->arch.pv.ldt_frames[i] = INVALID_MFN;
> +

I think it makes more sense to move this earlier so ldt_frames[] is initialised
even if pv_vcpu_initialise() fails. It may be benign, but it looks like an
accident abount to happen.

Also, nit: "unsigned int i"'s scope can be restricted to the loop itself.

  As in, "for ( unsigned int i =..."

>      BUILD_BUG_ON(X86_NR_VECTORS * sizeof(*v->arch.pv.trap_ctxt) >
>                   PAGE_SIZE);
>      v->arch.pv.trap_ctxt = xzalloc_array(struct trap_info, X86_NR_VECTORS);
> diff --git a/xen/arch/x86/pv/mm.c b/xen/arch/x86/pv/mm.c
> index 187f5f6a3e8c..4853e619f2a7 100644
> --- a/xen/arch/x86/pv/mm.c
> +++ b/xen/arch/x86/pv/mm.c
> @@ -86,7 +86,8 @@ bool pv_map_ldt_shadow_page(unsigned int offset)
>          return false;
>      }
>  
> -    pl1e = &pv_ldt_ptes(curr)[offset >> PAGE_SHIFT];
> +    curr->arch.pv.ldt_frames[offset >> PAGE_SHIFT] = page_to_mfn(page);
> +    pl1e = &__linear_l1_table[l1_linear_offset(LDT_VIRT_START(curr) + offset)];
>      l1e_add_flags(gl1e, _PAGE_RW);
>  
>      l1e_write(pl1e, gl1e);

Cheers,
Alejandro
diff mbox series

Patch

diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index b79d6badd71c..b659cffc7f81 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -523,6 +523,8 @@  struct pv_vcpu
     struct trap_info *trap_ctxt;
 
     unsigned long gdt_frames[FIRST_RESERVED_GDT_PAGE];
+    /* Max LDT entries is 8192, so 8192 * 8 = 64KiB (16 pages). */
+    mfn_t ldt_frames[16];
     unsigned long ldt_base;
     unsigned int gdt_ents, ldt_ents;
 
diff --git a/xen/arch/x86/pv/descriptor-tables.c b/xen/arch/x86/pv/descriptor-tables.c
index 5a79f022ce13..95b598a4c0cf 100644
--- a/xen/arch/x86/pv/descriptor-tables.c
+++ b/xen/arch/x86/pv/descriptor-tables.c
@@ -20,28 +20,29 @@ 
  */
 bool pv_destroy_ldt(struct vcpu *v)
 {
-    l1_pgentry_t *pl1e;
+    const unsigned int nr_frames = ARRAY_SIZE(v->arch.pv.ldt_frames);
     unsigned int i, mappings_dropped = 0;
-    struct page_info *page;
 
     ASSERT(!in_irq());
 
     ASSERT(v == current || !vcpu_cpu_dirty(v));
 
-    pl1e = pv_ldt_ptes(v);
+    destroy_perdomain_mapping(v, LDT_VIRT_START(v), nr_frames);
 
-    for ( i = 0; i < 16; i++ )
+    for ( i = 0; i < nr_frames; i++ )
     {
-        if ( !(l1e_get_flags(pl1e[i]) & _PAGE_PRESENT) )
-            continue;
+        mfn_t mfn = v->arch.pv.ldt_frames[i];
+        struct page_info *page;
 
-        page = l1e_get_page(pl1e[i]);
-        l1e_write(&pl1e[i], l1e_empty());
-        mappings_dropped++;
+        if ( mfn_eq(mfn, INVALID_MFN) )
+            continue;
 
+        v->arch.pv.ldt_frames[i] = INVALID_MFN;
+        page = mfn_to_page(mfn);
         ASSERT_PAGE_IS_TYPE(page, PGT_seg_desc_page);
         ASSERT_PAGE_IS_DOMAIN(page, v->domain);
         put_page_and_type(page);
+        mappings_dropped++;
     }
 
     return mappings_dropped;
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 7e8bffaae9a0..32d7488cc186 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -303,6 +303,7 @@  void pv_vcpu_destroy(struct vcpu *v)
 int pv_vcpu_initialise(struct vcpu *v)
 {
     struct domain *d = v->domain;
+    unsigned int i;
     int rc;
 
     ASSERT(!is_idle_domain(d));
@@ -311,6 +312,9 @@  int pv_vcpu_initialise(struct vcpu *v)
     if ( rc )
         return rc;
 
+    for ( i = 0; i < ARRAY_SIZE(v->arch.pv.ldt_frames); i++ )
+        v->arch.pv.ldt_frames[i] = INVALID_MFN;
+
     BUILD_BUG_ON(X86_NR_VECTORS * sizeof(*v->arch.pv.trap_ctxt) >
                  PAGE_SIZE);
     v->arch.pv.trap_ctxt = xzalloc_array(struct trap_info, X86_NR_VECTORS);
diff --git a/xen/arch/x86/pv/mm.c b/xen/arch/x86/pv/mm.c
index 187f5f6a3e8c..4853e619f2a7 100644
--- a/xen/arch/x86/pv/mm.c
+++ b/xen/arch/x86/pv/mm.c
@@ -86,7 +86,8 @@  bool pv_map_ldt_shadow_page(unsigned int offset)
         return false;
     }
 
-    pl1e = &pv_ldt_ptes(curr)[offset >> PAGE_SHIFT];
+    curr->arch.pv.ldt_frames[offset >> PAGE_SHIFT] = page_to_mfn(page);
+    pl1e = &__linear_l1_table[l1_linear_offset(LDT_VIRT_START(curr) + offset)];
     l1e_add_flags(gl1e, _PAGE_RW);
 
     l1e_write(pl1e, gl1e);