diff mbox series

[v6,1/9] mm: x86, arm64: add arch_has_hw_pte_young()

Message ID 20220104202227.2903605-2-yuzhao@google.com (mailing list archive)
State New, archived
Headers show
Series Multigenerational LRU Framework | expand

Commit Message

Yu Zhao Jan. 4, 2022, 8:22 p.m. UTC
Some architectures automatically set the accessed bit in PTEs, e.g.,
x86 and arm64 v8.2. On architectures that don't have this capability,
clearing the accessed bit in a PTE usually triggers a page fault
following the TLB miss of this PTE.

Being aware of this capability can help make better decisions, e.g.,
whether to spread the work out over a period of time to avoid bursty
page faults when trying to clear the accessed bit in a large number of
PTEs.

Signed-off-by: Yu Zhao <yuzhao@google.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
---
 arch/arm64/include/asm/cpufeature.h |  5 +++++
 arch/arm64/include/asm/pgtable.h    | 13 ++++++++-----
 arch/arm64/kernel/cpufeature.c      | 19 +++++++++++++++++++
 arch/arm64/tools/cpucaps            |  1 +
 arch/x86/include/asm/pgtable.h      |  6 +++---
 include/linux/pgtable.h             | 13 +++++++++++++
 mm/memory.c                         | 14 +-------------
 7 files changed, 50 insertions(+), 21 deletions(-)

Comments

Will Deacon Jan. 5, 2022, 10:45 a.m. UTC | #1
On Tue, Jan 04, 2022 at 01:22:20PM -0700, Yu Zhao wrote:
> Some architectures automatically set the accessed bit in PTEs, e.g.,
> x86 and arm64 v8.2. On architectures that don't have this capability,
> clearing the accessed bit in a PTE usually triggers a page fault
> following the TLB miss of this PTE.
> 
> Being aware of this capability can help make better decisions, e.g.,
> whether to spread the work out over a period of time to avoid bursty
> page faults when trying to clear the accessed bit in a large number of
> PTEs.
> 
> Signed-off-by: Yu Zhao <yuzhao@google.com>
> Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
> ---
>  arch/arm64/include/asm/cpufeature.h |  5 +++++
>  arch/arm64/include/asm/pgtable.h    | 13 ++++++++-----
>  arch/arm64/kernel/cpufeature.c      | 19 +++++++++++++++++++
>  arch/arm64/tools/cpucaps            |  1 +
>  arch/x86/include/asm/pgtable.h      |  6 +++---
>  include/linux/pgtable.h             | 13 +++++++++++++
>  mm/memory.c                         | 14 +-------------
>  7 files changed, 50 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index ef6be92b1921..99518b4b2a9e 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -779,6 +779,11 @@ static inline bool system_supports_tlb_range(void)
>  		cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
>  }
>  
> +static inline bool system_has_hw_af(void)
> +{
> +	return IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && cpus_have_const_cap(ARM64_HW_AF);
> +}
> +
>  extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
>  
>  static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index c4ba047a82d2..e736f47436c7 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -999,13 +999,16 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>   * page after fork() + CoW for pfn mappings. We don't always have a
>   * hardware-managed access flag on arm64.
>   */
> -static inline bool arch_faults_on_old_pte(void)
> +static inline bool arch_has_hw_pte_young(bool local)
>  {
> -	WARN_ON(preemptible());
> +	if (local) {
> +		WARN_ON(preemptible());
> +		return cpu_has_hw_af();
> +	}
>  
> -	return !cpu_has_hw_af();
> +	return system_has_hw_af();
>  }
> -#define arch_faults_on_old_pte		arch_faults_on_old_pte
> +#define arch_has_hw_pte_young		arch_has_hw_pte_young
>  
>  /*
>   * Experimentally, it's cheap to set the access flag in hardware and we
> @@ -1013,7 +1016,7 @@ static inline bool arch_faults_on_old_pte(void)
>   */
>  static inline bool arch_wants_old_prefaulted_pte(void)
>  {
> -	return !arch_faults_on_old_pte();
> +	return arch_has_hw_pte_young(true);
>  }
>  #define arch_wants_old_prefaulted_pte	arch_wants_old_prefaulted_pte
>  
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 6f3e677d88f1..5bb553ee2c0e 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -2171,6 +2171,25 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>  		.matches = has_hw_dbm,
>  		.cpu_enable = cpu_enable_hw_dbm,
>  	},
> +	{
> +		/*
> +		 * __cpu_setup always enables this capability. But if the boot
> +		 * CPU has it and a late CPU doesn't, the absent
> +		 * ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU will prevent this late CPU
> +		 * from going online. There is neither known hardware does that
> +		 * nor obvious reasons to design hardware works that way, hence
> +		 * no point leaving the door open here. If the need arises, a
> +		 * new weak system feature flag should do the trick.
> +		 */
> +		.desc = "Hardware update of the Access flag",
> +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
> +		.capability = ARM64_HW_AF,
> +		.sys_reg = SYS_ID_AA64MMFR1_EL1,
> +		.sign = FTR_UNSIGNED,
> +		.field_pos = ID_AA64MMFR1_HADBS_SHIFT,
> +		.min_field_value = 1,
> +		.matches = has_cpuid_feature,
> +	},
>  #endif
>  	{
>  		.desc = "CRC32 instructions",
> diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> index 870c39537dd0..56e4ef5d95fa 100644
> --- a/arch/arm64/tools/cpucaps
> +++ b/arch/arm64/tools/cpucaps
> @@ -36,6 +36,7 @@ HAS_STAGE2_FWB
>  HAS_SYSREG_GIC_CPUIF
>  HAS_TLB_RANGE
>  HAS_VIRT_HOST_EXTN
> +HW_AF
>  HW_DBM
>  KVM_PROTECTED_MODE
>  MISMATCHED_CACHE_TYPE

As discussed in the previous threads, we really don't need the complexity
of the additional cap for the arm64 part. Please can you just use the
existing code instead? It's both simpler and, as you say, it's equivalent
for existing hardware.

That way, this patch just ends up being a renaming exercise and we're all
good.

Thanks,

Will
Yu Zhao Jan. 5, 2022, 8:47 p.m. UTC | #2
On Wed, Jan 05, 2022 at 10:45:26AM +0000, Will Deacon wrote:
> On Tue, Jan 04, 2022 at 01:22:20PM -0700, Yu Zhao wrote:
> > Some architectures automatically set the accessed bit in PTEs, e.g.,
> > x86 and arm64 v8.2. On architectures that don't have this capability,
> > clearing the accessed bit in a PTE usually triggers a page fault
> > following the TLB miss of this PTE.
> > 
> > Being aware of this capability can help make better decisions, e.g.,
> > whether to spread the work out over a period of time to avoid bursty
> > page faults when trying to clear the accessed bit in a large number of
> > PTEs.
> > 
> > Signed-off-by: Yu Zhao <yuzhao@google.com>
> > Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
> > ---
> >  arch/arm64/include/asm/cpufeature.h |  5 +++++
> >  arch/arm64/include/asm/pgtable.h    | 13 ++++++++-----
> >  arch/arm64/kernel/cpufeature.c      | 19 +++++++++++++++++++
> >  arch/arm64/tools/cpucaps            |  1 +
> >  arch/x86/include/asm/pgtable.h      |  6 +++---
> >  include/linux/pgtable.h             | 13 +++++++++++++
> >  mm/memory.c                         | 14 +-------------
> >  7 files changed, 50 insertions(+), 21 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> > index ef6be92b1921..99518b4b2a9e 100644
> > --- a/arch/arm64/include/asm/cpufeature.h
> > +++ b/arch/arm64/include/asm/cpufeature.h
> > @@ -779,6 +779,11 @@ static inline bool system_supports_tlb_range(void)
> >  		cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
> >  }
> >  
> > +static inline bool system_has_hw_af(void)
> > +{
> > +	return IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && cpus_have_const_cap(ARM64_HW_AF);
> > +}
> > +
> >  extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
> >  
> >  static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
> > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> > index c4ba047a82d2..e736f47436c7 100644
> > --- a/arch/arm64/include/asm/pgtable.h
> > +++ b/arch/arm64/include/asm/pgtable.h
> > @@ -999,13 +999,16 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
> >   * page after fork() + CoW for pfn mappings. We don't always have a
> >   * hardware-managed access flag on arm64.
> >   */
> > -static inline bool arch_faults_on_old_pte(void)
> > +static inline bool arch_has_hw_pte_young(bool local)
> >  {
> > -	WARN_ON(preemptible());
> > +	if (local) {
> > +		WARN_ON(preemptible());
> > +		return cpu_has_hw_af();
> > +	}
> >  
> > -	return !cpu_has_hw_af();
> > +	return system_has_hw_af();
> >  }
> > -#define arch_faults_on_old_pte		arch_faults_on_old_pte
> > +#define arch_has_hw_pte_young		arch_has_hw_pte_young
> >  
> >  /*
> >   * Experimentally, it's cheap to set the access flag in hardware and we
> > @@ -1013,7 +1016,7 @@ static inline bool arch_faults_on_old_pte(void)
> >   */
> >  static inline bool arch_wants_old_prefaulted_pte(void)
> >  {
> > -	return !arch_faults_on_old_pte();
> > +	return arch_has_hw_pte_young(true);
> >  }
> >  #define arch_wants_old_prefaulted_pte	arch_wants_old_prefaulted_pte
> >  
> > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> > index 6f3e677d88f1..5bb553ee2c0e 100644
> > --- a/arch/arm64/kernel/cpufeature.c
> > +++ b/arch/arm64/kernel/cpufeature.c
> > @@ -2171,6 +2171,25 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
> >  		.matches = has_hw_dbm,
> >  		.cpu_enable = cpu_enable_hw_dbm,
> >  	},
> > +	{
> > +		/*
> > +		 * __cpu_setup always enables this capability. But if the boot
> > +		 * CPU has it and a late CPU doesn't, the absent
> > +		 * ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU will prevent this late CPU
> > +		 * from going online. There is neither known hardware does that
> > +		 * nor obvious reasons to design hardware works that way, hence
> > +		 * no point leaving the door open here. If the need arises, a
> > +		 * new weak system feature flag should do the trick.
> > +		 */
> > +		.desc = "Hardware update of the Access flag",
> > +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
> > +		.capability = ARM64_HW_AF,
> > +		.sys_reg = SYS_ID_AA64MMFR1_EL1,
> > +		.sign = FTR_UNSIGNED,
> > +		.field_pos = ID_AA64MMFR1_HADBS_SHIFT,
> > +		.min_field_value = 1,
> > +		.matches = has_cpuid_feature,
> > +	},
> >  #endif
> >  	{
> >  		.desc = "CRC32 instructions",
> > diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> > index 870c39537dd0..56e4ef5d95fa 100644
> > --- a/arch/arm64/tools/cpucaps
> > +++ b/arch/arm64/tools/cpucaps
> > @@ -36,6 +36,7 @@ HAS_STAGE2_FWB
> >  HAS_SYSREG_GIC_CPUIF
> >  HAS_TLB_RANGE
> >  HAS_VIRT_HOST_EXTN
> > +HW_AF
> >  HW_DBM
> >  KVM_PROTECTED_MODE
> >  MISMATCHED_CACHE_TYPE
> 
> As discussed in the previous threads, we really don't need the complexity
> of the additional cap for the arm64 part. Please can you just use the
> existing code instead? It's both simpler and, as you say, it's equivalent
> for existing hardware.
> 
> That way, this patch just ends up being a renaming exercise and we're all
> good.

No, renaming alone isn't enough. A caller needs to disable preemption
before calling system_has_hw_af(), and I don't think it's reasonable
to ask this caller to do it on x86 as well.

It seems you really prefer not to have HW_AF. So the best I can
accommodate, considering other potential archs, e.g., risc-v (I do
plan to provide benchmark results on risc-v, btw), is:

  static inline bool arch_has_hw_pte_young(bool local)
  {
	bool hw_af;

  	if (local) {
  		WARN_ON(preemptible());
  		return cpu_has_hw_af();
  	}
  
	preempt_disable();
  	hw_af = system_has_hw_af();
	preempt_enable();

	return hw_af;
  }

Or please give me something else I can call without disabling
preemption, sounds good?
Will Deacon Jan. 6, 2022, 10:30 a.m. UTC | #3
On Wed, Jan 05, 2022 at 01:47:08PM -0700, Yu Zhao wrote:
> On Wed, Jan 05, 2022 at 10:45:26AM +0000, Will Deacon wrote:
> > On Tue, Jan 04, 2022 at 01:22:20PM -0700, Yu Zhao wrote:
> > > diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> > > index 870c39537dd0..56e4ef5d95fa 100644
> > > --- a/arch/arm64/tools/cpucaps
> > > +++ b/arch/arm64/tools/cpucaps
> > > @@ -36,6 +36,7 @@ HAS_STAGE2_FWB
> > >  HAS_SYSREG_GIC_CPUIF
> > >  HAS_TLB_RANGE
> > >  HAS_VIRT_HOST_EXTN
> > > +HW_AF
> > >  HW_DBM
> > >  KVM_PROTECTED_MODE
> > >  MISMATCHED_CACHE_TYPE
> > 
> > As discussed in the previous threads, we really don't need the complexity
> > of the additional cap for the arm64 part. Please can you just use the
> > existing code instead? It's both simpler and, as you say, it's equivalent
> > for existing hardware.
> > 
> > That way, this patch just ends up being a renaming exercise and we're all
> > good.
> 
> No, renaming alone isn't enough. A caller needs to disable preemption
> before calling system_has_hw_af(), and I don't think it's reasonable
> to ask this caller to do it on x86 as well.
> 
> It seems you really prefer not to have HW_AF. So the best I can
> accommodate, considering other potential archs, e.g., risc-v (I do
> plan to provide benchmark results on risc-v, btw), is:
> 
>   static inline bool arch_has_hw_pte_young(bool local)
>   {
> 	bool hw_af;
> 
>   	if (local) {
>   		WARN_ON(preemptible());
>   		return cpu_has_hw_af();
>   	}
>   
> 	preempt_disable();
>   	hw_af = system_has_hw_af();
> 	preempt_enable();
> 
> 	return hw_af;
>   }
> 
> Or please give me something else I can call without disabling
> preemption, sounds good?

Sure thing, let me take a look. Do you have your series on a public git
tree someplace?

Cheers,

Will
Yu Zhao Jan. 7, 2022, 7:25 a.m. UTC | #4
On Thu, Jan 06, 2022 at 10:30:09AM +0000, Will Deacon wrote:
> On Wed, Jan 05, 2022 at 01:47:08PM -0700, Yu Zhao wrote:
> > On Wed, Jan 05, 2022 at 10:45:26AM +0000, Will Deacon wrote:
> > > On Tue, Jan 04, 2022 at 01:22:20PM -0700, Yu Zhao wrote:
> > > > diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> > > > index 870c39537dd0..56e4ef5d95fa 100644
> > > > --- a/arch/arm64/tools/cpucaps
> > > > +++ b/arch/arm64/tools/cpucaps
> > > > @@ -36,6 +36,7 @@ HAS_STAGE2_FWB
> > > >  HAS_SYSREG_GIC_CPUIF
> > > >  HAS_TLB_RANGE
> > > >  HAS_VIRT_HOST_EXTN
> > > > +HW_AF
> > > >  HW_DBM
> > > >  KVM_PROTECTED_MODE
> > > >  MISMATCHED_CACHE_TYPE
> > > 
> > > As discussed in the previous threads, we really don't need the complexity
> > > of the additional cap for the arm64 part. Please can you just use the
> > > existing code instead? It's both simpler and, as you say, it's equivalent
> > > for existing hardware.
> > > 
> > > That way, this patch just ends up being a renaming exercise and we're all
> > > good.
> > 
> > No, renaming alone isn't enough. A caller needs to disable preemption
> > before calling system_has_hw_af(), and I don't think it's reasonable
> > to ask this caller to do it on x86 as well.
> > 
> > It seems you really prefer not to have HW_AF. So the best I can
> > accommodate, considering other potential archs, e.g., risc-v (I do
> > plan to provide benchmark results on risc-v, btw), is:
> > 
> >   static inline bool arch_has_hw_pte_young(bool local)
> >   {
> > 	bool hw_af;
> > 
> >   	if (local) {
> >   		WARN_ON(preemptible());
> >   		return cpu_has_hw_af();
> >   	}
> >   
> > 	preempt_disable();
> >   	hw_af = system_has_hw_af();
> > 	preempt_enable();
> > 
> > 	return hw_af;
> >   }
> > 
> > Or please give me something else I can call without disabling
> > preemption, sounds good?
> 
> Sure thing, let me take a look. Do you have your series on a public git
> tree someplace?

Thanks!

This patch (updated) on Gerrit:
https://linux-mm-review.googlesource.com/c/page-reclaim/+/1500/1

And the entire series:
git fetch https://linux-mm.googlesource.com/page-reclaim refs/changes/08/1508/1
Will Deacon Jan. 11, 2022, 2:19 p.m. UTC | #5
On Fri, Jan 07, 2022 at 12:25:07AM -0700, Yu Zhao wrote:
> On Thu, Jan 06, 2022 at 10:30:09AM +0000, Will Deacon wrote:
> > On Wed, Jan 05, 2022 at 01:47:08PM -0700, Yu Zhao wrote:
> > > On Wed, Jan 05, 2022 at 10:45:26AM +0000, Will Deacon wrote:
> > > > On Tue, Jan 04, 2022 at 01:22:20PM -0700, Yu Zhao wrote:
> > > > > diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> > > > > index 870c39537dd0..56e4ef5d95fa 100644
> > > > > --- a/arch/arm64/tools/cpucaps
> > > > > +++ b/arch/arm64/tools/cpucaps
> > > > > @@ -36,6 +36,7 @@ HAS_STAGE2_FWB
> > > > >  HAS_SYSREG_GIC_CPUIF
> > > > >  HAS_TLB_RANGE
> > > > >  HAS_VIRT_HOST_EXTN
> > > > > +HW_AF
> > > > >  HW_DBM
> > > > >  KVM_PROTECTED_MODE
> > > > >  MISMATCHED_CACHE_TYPE
> > > > 
> > > > As discussed in the previous threads, we really don't need the complexity
> > > > of the additional cap for the arm64 part. Please can you just use the
> > > > existing code instead? It's both simpler and, as you say, it's equivalent
> > > > for existing hardware.
> > > > 
> > > > That way, this patch just ends up being a renaming exercise and we're all
> > > > good.
> > > 
> > > No, renaming alone isn't enough. A caller needs to disable preemption
> > > before calling system_has_hw_af(), and I don't think it's reasonable
> > > to ask this caller to do it on x86 as well.
> > > 
> > > It seems you really prefer not to have HW_AF. So the best I can
> > > accommodate, considering other potential archs, e.g., risc-v (I do
> > > plan to provide benchmark results on risc-v, btw), is:
> > > 
> > >   static inline bool arch_has_hw_pte_young(bool local)
> > >   {
> > > 	bool hw_af;
> > > 
> > >   	if (local) {
> > >   		WARN_ON(preemptible());
> > >   		return cpu_has_hw_af();
> > >   	}
> > >   
> > > 	preempt_disable();
> > >   	hw_af = system_has_hw_af();
> > > 	preempt_enable();
> > > 
> > > 	return hw_af;
> > >   }
> > > 
> > > Or please give me something else I can call without disabling
> > > preemption, sounds good?
> > 
> > Sure thing, let me take a look. Do you have your series on a public git
> > tree someplace?
> 
> Thanks!
> 
> This patch (updated) on Gerrit:
> https://linux-mm-review.googlesource.com/c/page-reclaim/+/1500/1

How about folding in something like the diff below? I've basically removed
that 'bool local' argument and dropped the preemptible() check from the
arm64 code.

Will

--->8

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 280123916fc2..990358eca359 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -998,27 +998,14 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
  * the pte is old and cannot be marked young. So we always end up with zeroed
  * page after fork() + CoW for pfn mappings. We don't always have a
  * hardware-managed access flag on arm64.
- *
- * The system-wide support isn't used when involving correctness and therefore
- * is allowed to be flaky.
  */
-static inline bool arch_has_hw_pte_young(bool local)
-{
-	WARN_ON(local && preemptible());
-
-	return cpu_has_hw_af();
-}
-#define arch_has_hw_pte_young		arch_has_hw_pte_young
+#define arch_has_hw_pte_young		cpu_has_hw_af
 
 /*
  * Experimentally, it's cheap to set the access flag in hardware and we
  * benefit from prefaulting mappings as 'old' to start with.
  */
-static inline bool arch_wants_old_prefaulted_pte(void)
-{
-	return arch_has_hw_pte_young(true);
-}
-#define arch_wants_old_prefaulted_pte	arch_wants_old_prefaulted_pte
+#define arch_wants_old_prefaulted_pte	cpu_has_hw_af
 
 static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
 {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index c60b16f8b741..3908780fc408 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1398,7 +1398,7 @@ static inline bool arch_has_pfn_modify_check(void)
 }
 
 #define arch_has_hw_pte_young arch_has_hw_pte_young
-static inline bool arch_has_hw_pte_young(bool local)
+static inline bool arch_has_hw_pte_young(void)
 {
 	return true;
 }
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 599cc232d5c4..0bd1beadb545 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -260,15 +260,12 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
 
 #ifndef arch_has_hw_pte_young
 /*
- * Return whether the accessed bit is supported by the local CPU or system-wide.
+ * Return whether the accessed bit is supported by the local CPU.
  *
- * This stub assumes accessing thru an old PTE triggers a page fault.
+ * This stub assumes accessing through an old PTE triggers a page fault.
  * Architectures that automatically set the access bit should overwrite it.
- *
- * Note that the system-wide support can be flaky and therefore shouldn't be
- * used when involving correctness.
  */
-static inline bool arch_has_hw_pte_young(bool local)
+static inline bool arch_has_hw_pte_young(void)
 {
 	return false;
 }
diff --git a/mm/memory.c b/mm/memory.c
index ead6c7d4b9a1..1f02de6d51e4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2743,7 +2743,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
 	 * On architectures with software "accessed" bits, we would
 	 * take a double page fault, so mark it accessed here.
 	 */
-	if (!arch_has_hw_pte_young(true) && !pte_young(vmf->orig_pte)) {
+	if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) {
 		pte_t entry;
 
 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
Yu Zhao Jan. 11, 2022, 10:27 p.m. UTC | #6
On Tue, Jan 11, 2022 at 02:19:02PM +0000, Will Deacon wrote:
> On Fri, Jan 07, 2022 at 12:25:07AM -0700, Yu Zhao wrote:
> > On Thu, Jan 06, 2022 at 10:30:09AM +0000, Will Deacon wrote:
> > > On Wed, Jan 05, 2022 at 01:47:08PM -0700, Yu Zhao wrote:
> > > > On Wed, Jan 05, 2022 at 10:45:26AM +0000, Will Deacon wrote:
> > > > > On Tue, Jan 04, 2022 at 01:22:20PM -0700, Yu Zhao wrote:
> > > > > > diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> > > > > > index 870c39537dd0..56e4ef5d95fa 100644
> > > > > > --- a/arch/arm64/tools/cpucaps
> > > > > > +++ b/arch/arm64/tools/cpucaps
> > > > > > @@ -36,6 +36,7 @@ HAS_STAGE2_FWB
> > > > > >  HAS_SYSREG_GIC_CPUIF
> > > > > >  HAS_TLB_RANGE
> > > > > >  HAS_VIRT_HOST_EXTN
> > > > > > +HW_AF
> > > > > >  HW_DBM
> > > > > >  KVM_PROTECTED_MODE
> > > > > >  MISMATCHED_CACHE_TYPE
> > > > > 
> > > > > As discussed in the previous threads, we really don't need the complexity
> > > > > of the additional cap for the arm64 part. Please can you just use the
> > > > > existing code instead? It's both simpler and, as you say, it's equivalent
> > > > > for existing hardware.
> > > > > 
> > > > > That way, this patch just ends up being a renaming exercise and we're all
> > > > > good.
> > > > 
> > > > No, renaming alone isn't enough. A caller needs to disable preemption
> > > > before calling system_has_hw_af(), and I don't think it's reasonable
> > > > to ask this caller to do it on x86 as well.
> > > > 
> > > > It seems you really prefer not to have HW_AF. So the best I can
> > > > accommodate, considering other potential archs, e.g., risc-v (I do
> > > > plan to provide benchmark results on risc-v, btw), is:
> > > > 
> > > >   static inline bool arch_has_hw_pte_young(bool local)
> > > >   {
> > > > 	bool hw_af;
> > > > 
> > > >   	if (local) {
> > > >   		WARN_ON(preemptible());
> > > >   		return cpu_has_hw_af();
> > > >   	}
> > > >   
> > > > 	preempt_disable();
> > > >   	hw_af = system_has_hw_af();
> > > > 	preempt_enable();
> > > > 
> > > > 	return hw_af;
> > > >   }
> > > > 
> > > > Or please give me something else I can call without disabling
> > > > preemption, sounds good?
> > > 
> > > Sure thing, let me take a look. Do you have your series on a public git
> > > tree someplace?
> > 
> > Thanks!
> > 
> > This patch (updated) on Gerrit:
> > https://linux-mm-review.googlesource.com/c/page-reclaim/+/1500/1
> 
> How about folding in something like the diff below? I've basically removed
> that 'bool local' argument and dropped the preemptible() check from the
> arm64 code.

This looks great, thanks.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index ef6be92b1921..99518b4b2a9e 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -779,6 +779,11 @@  static inline bool system_supports_tlb_range(void)
 		cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
 }
 
+static inline bool system_has_hw_af(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && cpus_have_const_cap(ARM64_HW_AF);
+}
+
 extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
 
 static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c4ba047a82d2..e736f47436c7 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -999,13 +999,16 @@  static inline void update_mmu_cache(struct vm_area_struct *vma,
  * page after fork() + CoW for pfn mappings. We don't always have a
  * hardware-managed access flag on arm64.
  */
-static inline bool arch_faults_on_old_pte(void)
+static inline bool arch_has_hw_pte_young(bool local)
 {
-	WARN_ON(preemptible());
+	if (local) {
+		WARN_ON(preemptible());
+		return cpu_has_hw_af();
+	}
 
-	return !cpu_has_hw_af();
+	return system_has_hw_af();
 }
-#define arch_faults_on_old_pte		arch_faults_on_old_pte
+#define arch_has_hw_pte_young		arch_has_hw_pte_young
 
 /*
  * Experimentally, it's cheap to set the access flag in hardware and we
@@ -1013,7 +1016,7 @@  static inline bool arch_faults_on_old_pte(void)
  */
 static inline bool arch_wants_old_prefaulted_pte(void)
 {
-	return !arch_faults_on_old_pte();
+	return arch_has_hw_pte_young(true);
 }
 #define arch_wants_old_prefaulted_pte	arch_wants_old_prefaulted_pte
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6f3e677d88f1..5bb553ee2c0e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2171,6 +2171,25 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_hw_dbm,
 		.cpu_enable = cpu_enable_hw_dbm,
 	},
+	{
+		/*
+		 * __cpu_setup always enables this capability. But if the boot
+		 * CPU has it and a late CPU doesn't, the absent
+		 * ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU will prevent this late CPU
+		 * from going online. There is neither known hardware does that
+		 * nor obvious reasons to design hardware works that way, hence
+		 * no point leaving the door open here. If the need arises, a
+		 * new weak system feature flag should do the trick.
+		 */
+		.desc = "Hardware update of the Access flag",
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.capability = ARM64_HW_AF,
+		.sys_reg = SYS_ID_AA64MMFR1_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64MMFR1_HADBS_SHIFT,
+		.min_field_value = 1,
+		.matches = has_cpuid_feature,
+	},
 #endif
 	{
 		.desc = "CRC32 instructions",
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 870c39537dd0..56e4ef5d95fa 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -36,6 +36,7 @@  HAS_STAGE2_FWB
 HAS_SYSREG_GIC_CPUIF
 HAS_TLB_RANGE
 HAS_VIRT_HOST_EXTN
+HW_AF
 HW_DBM
 KVM_PROTECTED_MODE
 MISMATCHED_CACHE_TYPE
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 448cd01eb3ec..c60b16f8b741 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1397,10 +1397,10 @@  static inline bool arch_has_pfn_modify_check(void)
 	return boot_cpu_has_bug(X86_BUG_L1TF);
 }
 
-#define arch_faults_on_old_pte arch_faults_on_old_pte
-static inline bool arch_faults_on_old_pte(void)
+#define arch_has_hw_pte_young arch_has_hw_pte_young
+static inline bool arch_has_hw_pte_young(bool local)
 {
-	return false;
+	return true;
 }
 
 #endif	/* __ASSEMBLY__ */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index e24d2c992b11..53bd6a26918f 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -258,6 +258,19 @@  static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
+#ifndef arch_has_hw_pte_young
+/*
+ * Return whether the accessed bit is supported by the local CPU or system-wide.
+ *
+ * This stub assumes accessing thru an old PTE triggers a page fault.
+ * Architectures that automatically set the access bit should overwrite it.
+ */
+static inline bool arch_has_hw_pte_young(bool local)
+{
+	return false;
+}
+#endif
+
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index 8f1de811a1dc..ead6c7d4b9a1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -121,18 +121,6 @@  int randomize_va_space __read_mostly =
 					2;
 #endif
 
-#ifndef arch_faults_on_old_pte
-static inline bool arch_faults_on_old_pte(void)
-{
-	/*
-	 * Those arches which don't have hw access flag feature need to
-	 * implement their own helper. By default, "true" means pagefault
-	 * will be hit on old pte.
-	 */
-	return true;
-}
-#endif
-
 #ifndef arch_wants_old_prefaulted_pte
 static inline bool arch_wants_old_prefaulted_pte(void)
 {
@@ -2755,7 +2743,7 @@  static inline bool cow_user_page(struct page *dst, struct page *src,
 	 * On architectures with software "accessed" bits, we would
 	 * take a double page fault, so mark it accessed here.
 	 */
-	if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
+	if (!arch_has_hw_pte_young(true) && !pte_young(vmf->orig_pte)) {
 		pte_t entry;
 
 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);