diff mbox series

[v3,7/7] mm/swap: Cache swap migration A/D bits support

Message ID 20220809220100.20033-8-peterx@redhat.com (mailing list archive)
State New
Headers show
Series mm: Remember a/d bits for migration entries | expand

Commit Message

Peter Xu Aug. 9, 2022, 10:01 p.m. UTC
Introduce a variable swap_migration_ad_supported to cache whether the arch
supports swap migration A/D bits.

Here one thing to mention is that SWP_MIG_TOTAL_BITS will internally
reference the other macro MAX_PHYSMEM_BITS, which is a function call on
x86 (constant on all the rest of archs).

It's safe to reference it in swapfile_init() because when reaching here
we're already during initcalls level 4 so we must have initialized 5-level
pgtable for x86_64 (right after early_identify_cpu() finishes).

- start_kernel
  - setup_arch
    - early_cpu_init
      - get_cpu_cap --> fetch from CPUID (including X86_FEATURE_LA57)
      - early_identify_cpu --> clear X86_FEATURE_LA57 (if early lvl5 not enabled (USE_EARLY_PGTABLE_L5))
  - arch_call_rest_init
    - rest_init
      - kernel_init
        - kernel_init_freeable
          - do_basic_setup
            - do_initcalls --> calls swapfile_init() (initcall level 4)

This should slightly speed up the migration swap entry handlings.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 include/linux/swapfile.h | 1 +
 include/linux/swapops.h  | 7 +------
 mm/swapfile.c            | 8 ++++++++
 3 files changed, 10 insertions(+), 6 deletions(-)

Comments

Huang, Ying Aug. 10, 2022, 6:37 a.m. UTC | #1
Peter Xu <peterx@redhat.com> writes:

> Introduce a variable swap_migration_ad_supported to cache whether the arch
> supports swap migration A/D bits.
>
> Here one thing to mention is that SWP_MIG_TOTAL_BITS will internally
> reference the other macro MAX_PHYSMEM_BITS, which is a function call on
> x86 (constant on all the rest of archs).
>
> It's safe to reference it in swapfile_init() because when reaching here
> we're already during initcalls level 4 so we must have initialized 5-level
> pgtable for x86_64 (right after early_identify_cpu() finishes).
>
> - start_kernel
>   - setup_arch
>     - early_cpu_init
>       - get_cpu_cap --> fetch from CPUID (including X86_FEATURE_LA57)
>       - early_identify_cpu --> clear X86_FEATURE_LA57 (if early lvl5 not enabled (USE_EARLY_PGTABLE_L5))
>   - arch_call_rest_init
>     - rest_init
>       - kernel_init
>         - kernel_init_freeable
>           - do_basic_setup
>             - do_initcalls --> calls swapfile_init() (initcall level 4)
>
> This should slightly speed up the migration swap entry handlings.
>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  include/linux/swapfile.h | 1 +
>  include/linux/swapops.h  | 7 +------
>  mm/swapfile.c            | 8 ++++++++
>  3 files changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
> index 54078542134c..87ec5e2cdb02 100644
> --- a/include/linux/swapfile.h
> +++ b/include/linux/swapfile.h
> @@ -9,5 +9,6 @@
>  extern struct swap_info_struct *swap_info[];
>  extern unsigned long generic_max_swapfile_size(void);
>  extern unsigned long max_swapfile_size(void);
> +extern bool swap_migration_ad_supported;
>  
>  #endif /* _LINUX_SWAPFILE_H */
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index 0e9579b90659..e6afc77c51ad 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -301,13 +301,8 @@ static inline swp_entry_t make_writable_migration_entry(pgoff_t offset)
>   */
>  static inline bool migration_entry_supports_ad(void)
>  {
> -	/*
> -	 * max_swapfile_size() returns the max supported swp-offset plus 1.
> -	 * We can support the migration A/D bits iff the pfn swap entry has
> -	 * the offset large enough to cover all of them (PFN, A & D bits).
> -	 */
>  #ifdef CONFIG_SWAP
> -	return max_swapfile_size() >= (1UL << SWP_MIG_TOTAL_BITS);
> +	return swap_migration_ad_supported;
>  #else  /* CONFIG_SWAP */
>  	return false;
>  #endif	/* CONFIG_SWAP */
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 794fa37bd0c3..c49cf25f0d08 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -64,6 +64,9 @@ EXPORT_SYMBOL_GPL(nr_swap_pages);
>  long total_swap_pages;
>  static int least_priority = -1;
>  static unsigned long swapfile_maximum_size;
> +#ifdef CONFIG_MIGRATION
> +bool swap_migration_ad_supported;
> +#endif	/* CONFIG_MIGRATION */
>  
>  static const char Bad_file[] = "Bad swap file entry ";
>  static const char Unused_file[] = "Unused swap file entry ";
> @@ -3685,6 +3688,11 @@ static int __init swapfile_init(void)
>  
>  	swapfile_maximum_size = arch_max_swapfile_size();
>  
> +#ifdef CONFIG_MIGRATION
> +	if (swapfile_maximum_size >= (1UL << SWP_MIG_TOTAL_BITS))
> +		swap_migration_ad_supported = true;
> +#endif	/* CONFIG_MIGRATION */
> +
>  	return 0;
>  }
>  subsys_initcall(swapfile_init);

I don't think it's necessary to add a variable for such a simple
function and it's not a super hot path.  But I don't have strong
opinions here.

Best Regards,
Huang, Ying
Peter Xu Aug. 10, 2022, 5:09 p.m. UTC | #2
On Wed, Aug 10, 2022 at 02:37:40PM +0800, Huang, Ying wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
> > Introduce a variable swap_migration_ad_supported to cache whether the arch
> > supports swap migration A/D bits.
> >
> > Here one thing to mention is that SWP_MIG_TOTAL_BITS will internally
> > reference the other macro MAX_PHYSMEM_BITS, which is a function call on
> > x86 (constant on all the rest of archs).
> >
> > It's safe to reference it in swapfile_init() because when reaching here
> > we're already during initcalls level 4 so we must have initialized 5-level
> > pgtable for x86_64 (right after early_identify_cpu() finishes).
> >
> > - start_kernel
> >   - setup_arch
> >     - early_cpu_init
> >       - get_cpu_cap --> fetch from CPUID (including X86_FEATURE_LA57)
> >       - early_identify_cpu --> clear X86_FEATURE_LA57 (if early lvl5 not enabled (USE_EARLY_PGTABLE_L5))
> >   - arch_call_rest_init
> >     - rest_init
> >       - kernel_init
> >         - kernel_init_freeable
> >           - do_basic_setup
> >             - do_initcalls --> calls swapfile_init() (initcall level 4)
> >
> > This should slightly speed up the migration swap entry handlings.
> >
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> > ---
> >  include/linux/swapfile.h | 1 +
> >  include/linux/swapops.h  | 7 +------
> >  mm/swapfile.c            | 8 ++++++++
> >  3 files changed, 10 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
> > index 54078542134c..87ec5e2cdb02 100644
> > --- a/include/linux/swapfile.h
> > +++ b/include/linux/swapfile.h
> > @@ -9,5 +9,6 @@
> >  extern struct swap_info_struct *swap_info[];
> >  extern unsigned long generic_max_swapfile_size(void);
> >  extern unsigned long max_swapfile_size(void);
> > +extern bool swap_migration_ad_supported;
> >  
> >  #endif /* _LINUX_SWAPFILE_H */
> > diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> > index 0e9579b90659..e6afc77c51ad 100644
> > --- a/include/linux/swapops.h
> > +++ b/include/linux/swapops.h
> > @@ -301,13 +301,8 @@ static inline swp_entry_t make_writable_migration_entry(pgoff_t offset)
> >   */
> >  static inline bool migration_entry_supports_ad(void)
> >  {
> > -	/*
> > -	 * max_swapfile_size() returns the max supported swp-offset plus 1.
> > -	 * We can support the migration A/D bits iff the pfn swap entry has
> > -	 * the offset large enough to cover all of them (PFN, A & D bits).
> > -	 */
> >  #ifdef CONFIG_SWAP
> > -	return max_swapfile_size() >= (1UL << SWP_MIG_TOTAL_BITS);
> > +	return swap_migration_ad_supported;
> >  #else  /* CONFIG_SWAP */
> >  	return false;
> >  #endif	/* CONFIG_SWAP */
> > diff --git a/mm/swapfile.c b/mm/swapfile.c
> > index 794fa37bd0c3..c49cf25f0d08 100644
> > --- a/mm/swapfile.c
> > +++ b/mm/swapfile.c
> > @@ -64,6 +64,9 @@ EXPORT_SYMBOL_GPL(nr_swap_pages);
> >  long total_swap_pages;
> >  static int least_priority = -1;
> >  static unsigned long swapfile_maximum_size;
> > +#ifdef CONFIG_MIGRATION
> > +bool swap_migration_ad_supported;
> > +#endif	/* CONFIG_MIGRATION */
> >  
> >  static const char Bad_file[] = "Bad swap file entry ";
> >  static const char Unused_file[] = "Unused swap file entry ";
> > @@ -3685,6 +3688,11 @@ static int __init swapfile_init(void)
> >  
> >  	swapfile_maximum_size = arch_max_swapfile_size();
> >  
> > +#ifdef CONFIG_MIGRATION
> > +	if (swapfile_maximum_size >= (1UL << SWP_MIG_TOTAL_BITS))
> > +		swap_migration_ad_supported = true;
> > +#endif	/* CONFIG_MIGRATION */
> > +
> >  	return 0;
> >  }
> >  subsys_initcall(swapfile_init);
> 
> I don't think it's necessary to add a variable for such a simple
> function and it's not a super hot path.  But I don't have strong
> opinions here.

Logically referencing SWP_MIG_TOTAL_BITS needs to go check
MAX_PHYSMEM_BITS, which should further go with:

# define MAX_PHYSMEM_BITS	(pgtable_l5_enabled() ? 52 : 46)

Then since swapfile.c doesn't have USE_EARLY_PGTABLE_L5 defined..

#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)

Then,

#define cpu_feature_enabled(bit)	\
	(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))

I think LA57 shouldn't be in DISABLED_MASK_BIT_SET() at all, in our case
the relevant disable mask is:

#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
			 DISABLE_ENQCMD)

Here we should have:

#ifdef CONFIG_X86_5LEVEL
# define DISABLE_LA57	0
#else
# define DISABLE_LA57	(1<<(X86_FEATURE_LA57 & 31))
#endif

So DISABLE_LA57 should be 0 when 5level enabled (true in my case).  Then we
really should land at static_cpu_has().

I checked up the code generated and surprisingly it's fairly fast indeed:

   (after fetching swapfile_maximum_size() and put into %rax, I'll change
    that into a variable soon..)
   0xffffffff83932e41 <+185>:   mov    $0x1,%edx
   0xffffffff83932e46 <+190>:   shl    $0x24,%rdx
   0xffffffff83932e4a <+194>:   xor    %r8d,%r8d
   0xffffffff83932e4d <+197>:   cmp    %rdx,%rax
   0xffffffff83932e50 <+200>:   jb     0xffffffff83932e59 <swapfile_init+209>
   0xffffffff83932e52 <+202>:   movb   $0x1,0xeab897(%rip)        # 0xffffffff847de6f0 <swap_migration_ad_supported>
   0xffffffff83932e59 <+209>:   mov    %r8d,%eax

Obviously on my testing host SWP_MIG_TOTAL_BITS is directly set as $0x24
(which reflects a 4-level pgtable) but frankly I cannot tell how it did
that without checking boot cpu x86_capabilities flags..  I'm pretty sure my
kernel config has CONFIG_X86_5LEVEL=y.

It'll be great if anyone already notices why it can be optimized into a
constant, but even if so I'm not confident that'll be a constant for all
the hosts and whether static_cpu_has() will still consume some insns.

Since the change is fairly simple after previous patch, I think it'll be
nice to keep it too.

Thanks,
diff mbox series

Patch

diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index 54078542134c..87ec5e2cdb02 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -9,5 +9,6 @@ 
 extern struct swap_info_struct *swap_info[];
 extern unsigned long generic_max_swapfile_size(void);
 extern unsigned long max_swapfile_size(void);
+extern bool swap_migration_ad_supported;
 
 #endif /* _LINUX_SWAPFILE_H */
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 0e9579b90659..e6afc77c51ad 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -301,13 +301,8 @@  static inline swp_entry_t make_writable_migration_entry(pgoff_t offset)
  */
 static inline bool migration_entry_supports_ad(void)
 {
-	/*
-	 * max_swapfile_size() returns the max supported swp-offset plus 1.
-	 * We can support the migration A/D bits iff the pfn swap entry has
-	 * the offset large enough to cover all of them (PFN, A & D bits).
-	 */
 #ifdef CONFIG_SWAP
-	return max_swapfile_size() >= (1UL << SWP_MIG_TOTAL_BITS);
+	return swap_migration_ad_supported;
 #else  /* CONFIG_SWAP */
 	return false;
 #endif	/* CONFIG_SWAP */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 794fa37bd0c3..c49cf25f0d08 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -64,6 +64,9 @@  EXPORT_SYMBOL_GPL(nr_swap_pages);
 long total_swap_pages;
 static int least_priority = -1;
 static unsigned long swapfile_maximum_size;
+#ifdef CONFIG_MIGRATION
+bool swap_migration_ad_supported;
+#endif	/* CONFIG_MIGRATION */
 
 static const char Bad_file[] = "Bad swap file entry ";
 static const char Unused_file[] = "Unused swap file entry ";
@@ -3685,6 +3688,11 @@  static int __init swapfile_init(void)
 
 	swapfile_maximum_size = arch_max_swapfile_size();
 
+#ifdef CONFIG_MIGRATION
+	if (swapfile_maximum_size >= (1UL << SWP_MIG_TOTAL_BITS))
+		swap_migration_ad_supported = true;
+#endif	/* CONFIG_MIGRATION */
+
 	return 0;
 }
 subsys_initcall(swapfile_init);