diff mbox series

[v2,3/5] mm/pdx: Standardize region validation wrt pdx compression

Message ID 20230728075903.7838-4-alejandro.vallejo@cloud.com (mailing list archive)
State Superseded
Headers show
Series Make PDX compression optional | expand

Commit Message

Alejandro Vallejo July 28, 2023, 7:59 a.m. UTC
Regions must be occasionally validated for pdx compression validity. That
is, whether any of the machine addresses spanning the region have a bit set
in the pdx "hole" (which is expected to always contain zeroes). There are
a few such tests through the code, and they all check for different things.

This patch replaces all such occurrences with a call to a centralized
function that checks a region for validity.

Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
---
v2:
  * s/occurences/ocurrences in commit message (Julien)
  * Use pfn_to_paddr()/paddr_to_pfn() (Julien)
  * Use (paddr_t,unsigned long) in pdx_is_region_compressible() (Julien, Jan)
---
 xen/arch/x86/x86_64/mm.c |  7 +++++--
 xen/common/efi/boot.c    | 13 ++++++++++---
 xen/common/pdx.c         | 10 ++++++++--
 xen/include/xen/pdx.h    |  9 +++++++++
 4 files changed, 32 insertions(+), 7 deletions(-)

Comments

Julien Grall July 28, 2023, 4:19 p.m. UTC | #1
Hi,

On 28/07/2023 08:59, Alejandro Vallejo wrote:
> Regions must be occasionally validated for pdx compression validity. That
> is, whether any of the machine addresses spanning the region have a bit set
> in the pdx "hole" (which is expected to always contain zeroes). There are
> a few such tests through the code, and they all check for different things.
> 
> This patch replaces all such occurrences with a call to a centralized
> function that checks a region for validity.
> 
> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>

Reviewed-by: Julien Grall <jgrall@amazon.com>

Cheers,
Jan Beulich July 31, 2023, 3:27 p.m. UTC | #2
On 28.07.2023 09:59, Alejandro Vallejo wrote:
> --- a/xen/arch/x86/x86_64/mm.c
> +++ b/xen/arch/x86/x86_64/mm.c
> @@ -1159,6 +1159,9 @@ static int mem_hotadd_check(unsigned long spfn, unsigned long epfn)
>  {
>      unsigned long s, e, length, sidx, eidx;
>  
> +    paddr_t mem_base = pfn_to_paddr(spfn);
> +    unsigned long mem_npages = epfn - spfn;
> +
>      if ( (spfn >= epfn) )
>          return 0;

While occasionally groups of declarations indeed want separating, the
rule of thumb is that the first blank line after declarations separates
them from statements. I don't see reason here to diverge from this.

> @@ -1660,6 +1663,8 @@ static bool __init cf_check rt_range_valid(unsigned long smfn, unsigned long emf
>  
>  void __init efi_init_memory(void)
>  {
> +    paddr_t mem_base;
> +    unsigned long mem_npages;

Why in the outermost scope when ...

> @@ -1732,6 +1737,9 @@ void __init efi_init_memory(void)
>          smfn = PFN_DOWN(desc->PhysicalStart);
>          emfn = PFN_UP(desc->PhysicalStart + len);
>  
> +        mem_base = pfn_to_paddr(smfn);
> +        mem_npages = emfn - smfn;
> +
>          if ( desc->Attribute & EFI_MEMORY_WB )
>              prot |= _PAGE_WB;
>          else if ( desc->Attribute & EFI_MEMORY_WT )
> @@ -1759,8 +1767,7 @@ void __init efi_init_memory(void)
>              prot |= _PAGE_NX;
>  
>          if ( pfn_to_pdx(emfn - 1) < (DIRECTMAP_SIZE >> PAGE_SHIFT) &&
> -             !(smfn & pfn_hole_mask) &&
> -             !((smfn ^ (emfn - 1)) & ~pfn_pdx_bottom_mask) )
> +             pdx_is_region_compressible(mem_base, mem_npages))
>          {
>              if ( (unsigned long)mfn_to_virt(emfn - 1) >= HYPERVISOR_VIRT_END )
>                  prot &= ~_PAGE_GLOBAL;

... you use the variables only in an inner one?

> --- a/xen/common/pdx.c
> +++ b/xen/common/pdx.c
> @@ -88,7 +88,7 @@ bool __mfn_valid(unsigned long mfn)
>  }
>  
>  /* Sets all bits from the most-significant 1-bit down to the LSB */
> -static uint64_t __init fill_mask(uint64_t mask)
> +static uint64_t fill_mask(uint64_t mask)
>  {
>      while (mask & (mask + 1))
>          mask |= mask + 1;

I see why you want __init dropped here, but the function wasn't written
for "common use" and hence may want improving first when intended for
more frequent (post-init) use as well. Then again I wonder why original
checking all got away without using this function ...

Jan
Alejandro Vallejo Aug. 7, 2023, 4:50 p.m. UTC | #3
On Mon, Jul 31, 2023 at 05:27:22PM +0200, Jan Beulich wrote:
> On 28.07.2023 09:59, Alejandro Vallejo wrote:
> > --- a/xen/arch/x86/x86_64/mm.c
> > +++ b/xen/arch/x86/x86_64/mm.c
> > @@ -1159,6 +1159,9 @@ static int mem_hotadd_check(unsigned long spfn, unsigned long epfn)
> >  {
> >      unsigned long s, e, length, sidx, eidx;
> >  
> > +    paddr_t mem_base = pfn_to_paddr(spfn);
> > +    unsigned long mem_npages = epfn - spfn;
> > +
> >      if ( (spfn >= epfn) )
> >          return 0;
> 
> While occasionally groups of declarations indeed want separating, the
> rule of thumb is that the first blank line after declarations separates
> them from statements. I don't see reason here to diverge from this.
Ack.

> 
> > @@ -1660,6 +1663,8 @@ static bool __init cf_check rt_range_valid(unsigned long smfn, unsigned long emf
> >  
> >  void __init efi_init_memory(void)
> >  {
> > +    paddr_t mem_base;
> > +    unsigned long mem_npages;
> 
> Why in the outermost scope when ...
> 
> > @@ -1732,6 +1737,9 @@ void __init efi_init_memory(void)
> >          smfn = PFN_DOWN(desc->PhysicalStart);
> >          emfn = PFN_UP(desc->PhysicalStart + len);
> >  
> > +        mem_base = pfn_to_paddr(smfn);
> > +        mem_npages = emfn - smfn;
> > +
> >          if ( desc->Attribute & EFI_MEMORY_WB )
> >              prot |= _PAGE_WB;
> >          else if ( desc->Attribute & EFI_MEMORY_WT )
> > @@ -1759,8 +1767,7 @@ void __init efi_init_memory(void)
> >              prot |= _PAGE_NX;
> >  
> >          if ( pfn_to_pdx(emfn - 1) < (DIRECTMAP_SIZE >> PAGE_SHIFT) &&
> > -             !(smfn & pfn_hole_mask) &&
> > -             !((smfn ^ (emfn - 1)) & ~pfn_pdx_bottom_mask) )
> > +             pdx_is_region_compressible(mem_base, mem_npages))
> >          {
> >              if ( (unsigned long)mfn_to_virt(emfn - 1) >= HYPERVISOR_VIRT_END )
> >                  prot &= ~_PAGE_GLOBAL;
> 
> ... you use the variables only in an inner one?
No good reason. I defaulted to the top of the function because C90 was
picky about declaring midway through a scope and I didn't consider going to
the top of the scope instead. Ack.

> 
> > --- a/xen/common/pdx.c
> > +++ b/xen/common/pdx.c
> > @@ -88,7 +88,7 @@ bool __mfn_valid(unsigned long mfn)
> >  }
> >  
> >  /* Sets all bits from the most-significant 1-bit down to the LSB */
> > -static uint64_t __init fill_mask(uint64_t mask)
> > +static uint64_t fill_mask(uint64_t mask)
> >  {
> >      while (mask & (mask + 1))
> >          mask |= mask + 1;
> 
> I see why you want __init dropped here, but the function wasn't written
> for "common use" and hence may want improving first when intended for
> more frequent (post-init) use as well.
It's not so much that I want it rather than I need it. hotadd_mem_check()
uses it and it's not part of the init process. That's the only non-init use
of it, and it hardly qualifies as "common". IMO, 
don't matter a whole

> Then again I wonder why original
> checking all got away without using this function ...
> 
> Jan
My guess is that most real systems don't exercise the corner cases, and so
the code was just silently broken passing the checks all the time. I could
not find an explanation for the OR check in hotadd_mem_check(), so I just
attributed it to being a bug introduced out of lack of knowledge of pdx.

Thanks,
Alejandro
diff mbox series

Patch

diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index 60db439af3..652e787934 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1159,6 +1159,9 @@  static int mem_hotadd_check(unsigned long spfn, unsigned long epfn)
 {
     unsigned long s, e, length, sidx, eidx;
 
+    paddr_t mem_base = pfn_to_paddr(spfn);
+    unsigned long mem_npages = epfn - spfn;
+
     if ( (spfn >= epfn) )
         return 0;
 
@@ -1168,7 +1171,7 @@  static int mem_hotadd_check(unsigned long spfn, unsigned long epfn)
     if ( (spfn | epfn) & ((1UL << PAGETABLE_ORDER) - 1) )
         return 0;
 
-    if ( (spfn | epfn) & pfn_hole_mask )
+    if ( !pdx_is_region_compressible(mem_base, mem_npages) )
         return 0;
 
     /* Make sure the new range is not present now */
@@ -1207,7 +1210,7 @@  static int mem_hotadd_check(unsigned long spfn, unsigned long epfn)
 
     length += (e - s) * sizeof(struct page_info);
 
-    if ((length >> PAGE_SHIFT) > (epfn - spfn))
+    if ((length >> PAGE_SHIFT) > mem_npages)
         return 0;
 
     return 1;
diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c
index 79a654af69..52a7239389 100644
--- a/xen/common/efi/boot.c
+++ b/xen/common/efi/boot.c
@@ -14,6 +14,7 @@ 
 #include <xen/multiboot.h>
 #include <xen/param.h>
 #include <xen/pci_regs.h>
+#include <xen/pdx.h>
 #include <xen/pfn.h>
 #if EFI_PAGE_SIZE != PAGE_SIZE
 # error Cannot use xen/pfn.h here!
@@ -1645,9 +1646,11 @@  static __init void copy_mapping(unsigned long mfn, unsigned long end,
 
 static bool __init cf_check ram_range_valid(unsigned long smfn, unsigned long emfn)
 {
+    paddr_t ram_base = pfn_to_paddr(smfn);
+    unsigned long ram_npages = emfn - smfn;
     unsigned long sz = pfn_to_pdx(emfn - 1) / PDX_GROUP_COUNT + 1;
 
-    return !(smfn & pfn_hole_mask) &&
+    return pdx_is_region_compressible(ram_base, ram_npages) &&
            find_next_bit(pdx_group_valid, sz,
                          pfn_to_pdx(smfn) / PDX_GROUP_COUNT) < sz;
 }
@@ -1660,6 +1663,8 @@  static bool __init cf_check rt_range_valid(unsigned long smfn, unsigned long emf
 
 void __init efi_init_memory(void)
 {
+    paddr_t mem_base;
+    unsigned long mem_npages;
     unsigned int i;
     l4_pgentry_t *efi_l4t;
     struct rt_extra {
@@ -1732,6 +1737,9 @@  void __init efi_init_memory(void)
         smfn = PFN_DOWN(desc->PhysicalStart);
         emfn = PFN_UP(desc->PhysicalStart + len);
 
+        mem_base = pfn_to_paddr(smfn);
+        mem_npages = emfn - smfn;
+
         if ( desc->Attribute & EFI_MEMORY_WB )
             prot |= _PAGE_WB;
         else if ( desc->Attribute & EFI_MEMORY_WT )
@@ -1759,8 +1767,7 @@  void __init efi_init_memory(void)
             prot |= _PAGE_NX;
 
         if ( pfn_to_pdx(emfn - 1) < (DIRECTMAP_SIZE >> PAGE_SHIFT) &&
-             !(smfn & pfn_hole_mask) &&
-             !((smfn ^ (emfn - 1)) & ~pfn_pdx_bottom_mask) )
+             pdx_is_region_compressible(mem_base, mem_npages))
         {
             if ( (unsigned long)mfn_to_virt(emfn - 1) >= HYPERVISOR_VIRT_END )
                 prot &= ~_PAGE_GLOBAL;
diff --git a/xen/common/pdx.c b/xen/common/pdx.c
index 99d4a90a50..3c88ceeb9c 100644
--- a/xen/common/pdx.c
+++ b/xen/common/pdx.c
@@ -88,7 +88,7 @@  bool __mfn_valid(unsigned long mfn)
 }
 
 /* Sets all bits from the most-significant 1-bit down to the LSB */
-static uint64_t __init fill_mask(uint64_t mask)
+static uint64_t fill_mask(uint64_t mask)
 {
     while (mask & (mask + 1))
         mask |= mask + 1;
@@ -96,6 +96,12 @@  static uint64_t __init fill_mask(uint64_t mask)
     return mask;
 }
 
+bool pdx_is_region_compressible(paddr_t base, unsigned long npages)
+{
+    return !(paddr_to_pfn(base) & pfn_hole_mask) &&
+           !(pdx_region_mask(base, npages * PAGE_SIZE) & ~ma_va_bottom_mask);
+}
+
 /* We don't want to compress the low MAX_ORDER bits of the addresses. */
 uint64_t __init pdx_init_mask(uint64_t base_addr)
 {
@@ -103,7 +109,7 @@  uint64_t __init pdx_init_mask(uint64_t base_addr)
                          (uint64_t)1 << (MAX_ORDER + PAGE_SHIFT)) - 1);
 }
 
-uint64_t __init pdx_region_mask(uint64_t base, uint64_t len)
+uint64_t pdx_region_mask(uint64_t base, uint64_t len)
 {
     /*
      * We say a bit "moves" in a range if there exist 2 addresses in that
diff --git a/xen/include/xen/pdx.h b/xen/include/xen/pdx.h
index d96f03d6e6..8c6aec2aea 100644
--- a/xen/include/xen/pdx.h
+++ b/xen/include/xen/pdx.h
@@ -79,6 +79,15 @@  extern unsigned long pfn_top_mask, ma_top_mask;
                          (sizeof(*frame_table) & -sizeof(*frame_table)))
 extern unsigned long pdx_group_valid[];
 
+/**
+ * Validate a region's compatibility with the current compression runtime
+ *
+ * @param base Base address of the region
+ * @param npages Number of PAGE_SIZE-sized pages in the region
+ * @return True iff the region can be used with the current compression
+ */
+bool pdx_is_region_compressible(paddr_t base, unsigned long npages);
+
 /**
  * Calculates a mask covering "moving" bits of all addresses of a region
  *