Message ID | 20220309021230.721028-2-yuzhao@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Multi-Gen LRU Framework | expand |
On Wed, Mar 9, 2022 at 3:47 PM Yu Zhao <yuzhao@google.com> wrote: > > Some architectures automatically set the accessed bit in PTEs, e.g., > x86 and arm64 v8.2. On architectures that do not have this capability, > clearing the accessed bit in a PTE usually triggers a page fault > following the TLB miss of this PTE (to emulate the accessed bit). > > Being aware of this capability can help make better decisions, e.g., > whether to spread the work out over a period of time to reduce bursty > page faults when trying to clear the accessed bit in many PTEs. > > Note that theoretically this capability can be unreliable, e.g., > hotplugged CPUs might be different from builtin ones. Therefore it > should not be used in architecture-independent code that involves > correctness, e.g., to determine whether TLB flushes are required (in > combination with the accessed bit). > > Signed-off-by: Yu Zhao <yuzhao@google.com> > Acked-by: Brian Geffon <bgeffon@google.com> > Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org> > Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name> > Acked-by: Steven Barrett <steven@liquorix.net> > Acked-by: Suleiman Souhlal <suleiman@google.com> > Acked-by: Will Deacon <will@kernel.org> > Tested-by: Daniel Byrne <djbyrne@mtu.edu> > Tested-by: Donald Carr <d@chaos-reins.com> > Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com> > Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru> > Tested-by: Shuang Zhai <szhai2@cs.rochester.edu> > Tested-by: Sofia Trinh <sofia.trinh@edi.works> > Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com> > --- Reviewed-by: Barry Song <baohua@kernel.org> i guess arch_has_hw_pte_young() isn't called that often in either mm/memory.c or mm/vmscan.c. Otherwise, moving to a static key might help. Is it? > arch/arm64/include/asm/pgtable.h | 14 ++------------ > arch/x86/include/asm/pgtable.h | 6 +++--- > include/linux/pgtable.h | 13 +++++++++++++ > mm/memory.c | 14 +------------- > 4 files changed, 19 insertions(+), 28 deletions(-) > > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index c4ba047a82d2..990358eca359 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -999,23 +999,13 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, > * page after fork() + CoW for pfn mappings. We don't always have a > * hardware-managed access flag on arm64. > */ > -static inline bool arch_faults_on_old_pte(void) > -{ > - WARN_ON(preemptible()); > - > - return !cpu_has_hw_af(); > -} > -#define arch_faults_on_old_pte arch_faults_on_old_pte > +#define arch_has_hw_pte_young cpu_has_hw_af > > /* > * Experimentally, it's cheap to set the access flag in hardware and we > * benefit from prefaulting mappings as 'old' to start with. > */ > -static inline bool arch_wants_old_prefaulted_pte(void) > -{ > - return !arch_faults_on_old_pte(); > -} > -#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte > +#define arch_wants_old_prefaulted_pte cpu_has_hw_af > > static inline pgprot_t arch_filter_pgprot(pgprot_t prot) > { > diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h > index 8a9432fb3802..60b6ce45c2e3 100644 > --- a/arch/x86/include/asm/pgtable.h > +++ b/arch/x86/include/asm/pgtable.h > @@ -1423,10 +1423,10 @@ static inline bool arch_has_pfn_modify_check(void) > return boot_cpu_has_bug(X86_BUG_L1TF); > } > > -#define arch_faults_on_old_pte arch_faults_on_old_pte > -static inline bool arch_faults_on_old_pte(void) > +#define arch_has_hw_pte_young arch_has_hw_pte_young > +static inline bool arch_has_hw_pte_young(void) > { > - return false; > + return true; > } > > #endif /* __ASSEMBLY__ */ > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index f4f4077b97aa..79f64dcff07d 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -259,6 +259,19 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, > #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ > #endif > > +#ifndef arch_has_hw_pte_young > +/* > + * Return whether the accessed bit is supported on the local CPU. > + * > + * This stub assumes accessing through an old PTE triggers a page fault. > + * Architectures that automatically set the access bit should overwrite it. > + */ > +static inline bool arch_has_hw_pte_young(void) > +{ > + return false; > +} > +#endif > + > #ifndef __HAVE_ARCH_PTEP_CLEAR > static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, > pte_t *ptep) > diff --git a/mm/memory.c b/mm/memory.c > index c125c4969913..a7379196a47e 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -122,18 +122,6 @@ int randomize_va_space __read_mostly = > 2; > #endif > > -#ifndef arch_faults_on_old_pte > -static inline bool arch_faults_on_old_pte(void) > -{ > - /* > - * Those arches which don't have hw access flag feature need to > - * implement their own helper. By default, "true" means pagefault > - * will be hit on old pte. > - */ > - return true; > -} > -#endif > - > #ifndef arch_wants_old_prefaulted_pte > static inline bool arch_wants_old_prefaulted_pte(void) > { > @@ -2778,7 +2766,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src, > * On architectures with software "accessed" bits, we would > * take a double page fault, so mark it accessed here. > */ > - if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { > + if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) { > pte_t entry; > > vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); > -- > 2.35.1.616.g0bdcbb4464-goog > Thanks Barry
On Fri, Mar 11, 2022 at 3:55 AM Barry Song <21cnbao@gmail.com> wrote: > > On Wed, Mar 9, 2022 at 3:47 PM Yu Zhao <yuzhao@google.com> wrote: > > > > Some architectures automatically set the accessed bit in PTEs, e.g., > > x86 and arm64 v8.2. On architectures that do not have this capability, > > clearing the accessed bit in a PTE usually triggers a page fault > > following the TLB miss of this PTE (to emulate the accessed bit). > > > > Being aware of this capability can help make better decisions, e.g., > > whether to spread the work out over a period of time to reduce bursty > > page faults when trying to clear the accessed bit in many PTEs. > > > > Note that theoretically this capability can be unreliable, e.g., > > hotplugged CPUs might be different from builtin ones. Therefore it > > should not be used in architecture-independent code that involves > > correctness, e.g., to determine whether TLB flushes are required (in > > combination with the accessed bit). > > > > Signed-off-by: Yu Zhao <yuzhao@google.com> > > Acked-by: Brian Geffon <bgeffon@google.com> > > Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org> > > Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name> > > Acked-by: Steven Barrett <steven@liquorix.net> > > Acked-by: Suleiman Souhlal <suleiman@google.com> > > Acked-by: Will Deacon <will@kernel.org> > > Tested-by: Daniel Byrne <djbyrne@mtu.edu> > > Tested-by: Donald Carr <d@chaos-reins.com> > > Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com> > > Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru> > > Tested-by: Shuang Zhai <szhai2@cs.rochester.edu> > > Tested-by: Sofia Trinh <sofia.trinh@edi.works> > > Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com> > > --- > > Reviewed-by: Barry Song <baohua@kernel.org> Thanks. > i guess arch_has_hw_pte_young() isn't called that often in either > mm/memory.c or mm/vmscan.c. > Otherwise, moving to a static key might help. Is it? MRS shouldn't be slower than either branch of a static key. With a static key, we only can optimize one of the two cases. There is a *theoretical* problem with MRS: ARM specs don't prohibit a physical CPU to support both cases (on different logical CPUs).
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c4ba047a82d2..990358eca359 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -999,23 +999,13 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * page after fork() + CoW for pfn mappings. We don't always have a * hardware-managed access flag on arm64. */ -static inline bool arch_faults_on_old_pte(void) -{ - WARN_ON(preemptible()); - - return !cpu_has_hw_af(); -} -#define arch_faults_on_old_pte arch_faults_on_old_pte +#define arch_has_hw_pte_young cpu_has_hw_af /* * Experimentally, it's cheap to set the access flag in hardware and we * benefit from prefaulting mappings as 'old' to start with. */ -static inline bool arch_wants_old_prefaulted_pte(void) -{ - return !arch_faults_on_old_pte(); -} -#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte +#define arch_wants_old_prefaulted_pte cpu_has_hw_af static inline pgprot_t arch_filter_pgprot(pgprot_t prot) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 8a9432fb3802..60b6ce45c2e3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1423,10 +1423,10 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } -#define arch_faults_on_old_pte arch_faults_on_old_pte -static inline bool arch_faults_on_old_pte(void) +#define arch_has_hw_pte_young arch_has_hw_pte_young +static inline bool arch_has_hw_pte_young(void) { - return false; + return true; } #endif /* __ASSEMBLY__ */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index f4f4077b97aa..79f64dcff07d 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -259,6 +259,19 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_pte_young +/* + * Return whether the accessed bit is supported on the local CPU. + * + * This stub assumes accessing through an old PTE triggers a page fault. + * Architectures that automatically set the access bit should overwrite it. + */ +static inline bool arch_has_hw_pte_young(void) +{ + return false; +} +#endif + #ifndef __HAVE_ARCH_PTEP_CLEAR static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/mm/memory.c b/mm/memory.c index c125c4969913..a7379196a47e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -122,18 +122,6 @@ int randomize_va_space __read_mostly = 2; #endif -#ifndef arch_faults_on_old_pte -static inline bool arch_faults_on_old_pte(void) -{ - /* - * Those arches which don't have hw access flag feature need to - * implement their own helper. By default, "true" means pagefault - * will be hit on old pte. - */ - return true; -} -#endif - #ifndef arch_wants_old_prefaulted_pte static inline bool arch_wants_old_prefaulted_pte(void) { @@ -2778,7 +2766,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src, * On architectures with software "accessed" bits, we would * take a double page fault, so mark it accessed here. */ - if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { + if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) { pte_t entry; vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);