diff mbox series

[v4,01/11] mm: x86, arm64: add arch_has_hw_pte_young()

Message ID 20210818063107.2696454-2-yuzhao@google.com (mailing list archive)
State New
Headers show
Series Multigenerational LRU Framework | expand

Commit Message

Yu Zhao Aug. 18, 2021, 6:30 a.m. UTC
Some architectures set the accessed bit in PTEs automatically, e.g.,
x86, and arm64 v8.2 and later. On architectures that do not have this
capability, clearing the accessed bit in a PTE triggers a page fault
following the TLB miss.

Being aware of this capability can help make better decisions, i.e.,
whether to limit the size of each batch of PTEs and the burst of
batches when clearing the accessed bit.

Signed-off-by: Yu Zhao <yuzhao@google.com>
---
 arch/arm64/include/asm/cpufeature.h | 19 ++++++-------------
 arch/arm64/include/asm/pgtable.h    | 10 ++++------
 arch/arm64/kernel/cpufeature.c      | 19 +++++++++++++++++++
 arch/arm64/mm/proc.S                | 12 ------------
 arch/arm64/tools/cpucaps            |  1 +
 arch/x86/include/asm/pgtable.h      |  6 +++---
 include/linux/pgtable.h             | 12 ++++++++++++
 mm/memory.c                         | 14 +-------------
 8 files changed, 46 insertions(+), 47 deletions(-)

Comments

Will Deacon Aug. 19, 2021, 9:19 a.m. UTC | #1
On Wed, Aug 18, 2021 at 12:30:57AM -0600, Yu Zhao wrote:
> Some architectures set the accessed bit in PTEs automatically, e.g.,
> x86, and arm64 v8.2 and later. On architectures that do not have this
> capability, clearing the accessed bit in a PTE triggers a page fault
> following the TLB miss.
> 
> Being aware of this capability can help make better decisions, i.e.,
> whether to limit the size of each batch of PTEs and the burst of
> batches when clearing the accessed bit.
> 
> Signed-off-by: Yu Zhao <yuzhao@google.com>
> ---
>  arch/arm64/include/asm/cpufeature.h | 19 ++++++-------------
>  arch/arm64/include/asm/pgtable.h    | 10 ++++------
>  arch/arm64/kernel/cpufeature.c      | 19 +++++++++++++++++++
>  arch/arm64/mm/proc.S                | 12 ------------
>  arch/arm64/tools/cpucaps            |  1 +
>  arch/x86/include/asm/pgtable.h      |  6 +++---
>  include/linux/pgtable.h             | 12 ++++++++++++
>  mm/memory.c                         | 14 +-------------
>  8 files changed, 46 insertions(+), 47 deletions(-)

Please cc linux-arm-kernel and the maintainers on arm64 patches.

> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index 9bb9d11750d7..2020b9e818c8 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -776,6 +776,12 @@ static inline bool system_supports_tlb_range(void)
>  		cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
>  }
>  
> +/* Check whether hardware update of the Access flag is supported. */
> +static inline bool system_has_hw_af(void)
> +{
> +	return IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && cpus_have_const_cap(ARM64_HW_AF);
> +}

How accurate does this need to be? Heterogeneous (big/little) systems are
very common on arm64, so the existing code enables hardware access flag
unconditionally on CPUs that support it, meaning we could end up running
on a system where some CPUs have hardware update and others do not.

With your change, we only enable hardware access flag if _all_ CPUs support
it (and furthermore, we prevent late onlining of CPUs without the feature
if was detected at boot). This sacrifices a lot of flexibility, particularly
if we end up tackling CPU errata in this area in future, and it's not clear
that it's really required for what you're trying to do.

Will
Yu Zhao Aug. 19, 2021, 9:23 p.m. UTC | #2
On Thu, Aug 19, 2021 at 3:19 AM Will Deacon <will@kernel.org> wrote:
>
> On Wed, Aug 18, 2021 at 12:30:57AM -0600, Yu Zhao wrote:
> > Some architectures set the accessed bit in PTEs automatically, e.g.,
> > x86, and arm64 v8.2 and later. On architectures that do not have this
> > capability, clearing the accessed bit in a PTE triggers a page fault
> > following the TLB miss.
> >
> > Being aware of this capability can help make better decisions, i.e.,
> > whether to limit the size of each batch of PTEs and the burst of
> > batches when clearing the accessed bit.
> >
> > Signed-off-by: Yu Zhao <yuzhao@google.com>
> > ---
> >  arch/arm64/include/asm/cpufeature.h | 19 ++++++-------------
> >  arch/arm64/include/asm/pgtable.h    | 10 ++++------
> >  arch/arm64/kernel/cpufeature.c      | 19 +++++++++++++++++++
> >  arch/arm64/mm/proc.S                | 12 ------------
> >  arch/arm64/tools/cpucaps            |  1 +
> >  arch/x86/include/asm/pgtable.h      |  6 +++---
> >  include/linux/pgtable.h             | 12 ++++++++++++
> >  mm/memory.c                         | 14 +-------------
> >  8 files changed, 46 insertions(+), 47 deletions(-)
>
> Please cc linux-arm-kernel and the maintainers on arm64 patches.

Done. Also adding a link to the original post:
https://lore.kernel.org/patchwork/patch/1478354/

> > diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> > index 9bb9d11750d7..2020b9e818c8 100644
> > --- a/arch/arm64/include/asm/cpufeature.h
> > +++ b/arch/arm64/include/asm/cpufeature.h
> > @@ -776,6 +776,12 @@ static inline bool system_supports_tlb_range(void)
> >               cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
> >  }
> >
> > +/* Check whether hardware update of the Access flag is supported. */
> > +static inline bool system_has_hw_af(void)
> > +{
> > +     return IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && cpus_have_const_cap(ARM64_HW_AF);
> > +}
>
> How accurate does this need to be? Heterogeneous (big/little) systems are
> very common on arm64, so the existing code enables hardware access flag
> unconditionally on CPUs that support it, meaning we could end up running
> on a system where some CPUs have hardware update and others do not.
>
> With your change, we only enable hardware access flag if _all_ CPUs support
> it (and furthermore, we prevent late onlining of CPUs without the feature
> if was detected at boot). This sacrifices a lot of flexibility, particularly
> if we end up tackling CPU errata in this area in future, and it's not clear
> that it's really required for what you're trying to do.

It doesn't need to be accurate but then my question is how helpful it
is if it's not accurate. Conversely, shouldn't all CPUs have it if
it's really helpful? So it seems to me whether such a flexibility is
needed in the future is questionable -- AFAIK, there are no CPUs (ARM
or not) that have such a behavior in the present. I agree we want to
try to be future proof, but usually this comes at a cost. For this
specific case, we would need two functions to detect the capability at
global and local levels to fully explore this theoretical flexibility.

The bottomline is I don't have a problem with having an additional
function to detect the capability at a global level. Note that the
specific concern in this patchset is that if a CPU thinks all other
CPUs have the capability and clears the accessed bit on many PTEs,
then those who don't have the capability may suffer the faults for
that action. (This is different from the cow_user_page() case.)
Hillf Danton Oct. 10, 2021, 8:59 a.m. UTC | #3
On Thu, 19 Aug 2021 15:23:02 -0600 Yu Zhao wrote:
>On Thu, Aug 19, 2021 at 3:19 AM Will Deacon <will@kernel.org> wrote:
>>
>> How accurate does this need to be? Heterogeneous (big/little) systems are
>> very common on arm64, so the existing code enables hardware access flag
>> unconditionally on CPUs that support it, meaning we could end up running
>> on a system where some CPUs have hardware update and others do not.
>>
>> With your change, we only enable hardware access flag if _all_ CPUs support
>> it (and furthermore, we prevent late onlining of CPUs without the feature
>> if was detected at boot). This sacrifices a lot of flexibility, particularly
>> if we end up tackling CPU errata in this area in future, and it's not clear
>> that it's really required for what you're trying to do.
>
>It doesn't need to be accurate but then my question is how helpful it
>is if it's not accurate.

Alternatively to make the issue simpler, spin without arm64 included given
that it will be revisited once MGLRU lands in the mainline tree.

Hillf
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9bb9d11750d7..2020b9e818c8 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -776,6 +776,12 @@  static inline bool system_supports_tlb_range(void)
 		cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
 }
 
+/* Check whether hardware update of the Access flag is supported. */
+static inline bool system_has_hw_af(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && cpus_have_const_cap(ARM64_HW_AF);
+}
+
 extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
 
 static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
@@ -799,19 +805,6 @@  static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
 	}
 }
 
-/* Check whether hardware update of the Access flag is supported */
-static inline bool cpu_has_hw_af(void)
-{
-	u64 mmfr1;
-
-	if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM))
-		return false;
-
-	mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
-	return cpuid_feature_extract_unsigned_field(mmfr1,
-						ID_AA64MMFR1_HADBS_SHIFT);
-}
-
 static inline bool cpu_has_pan(void)
 {
 	u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index f09bf5c02891..b63a6a7b62ee 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -993,13 +993,11 @@  static inline void update_mmu_cache(struct vm_area_struct *vma,
  * page after fork() + CoW for pfn mappings. We don't always have a
  * hardware-managed access flag on arm64.
  */
-static inline bool arch_faults_on_old_pte(void)
+static inline bool arch_has_hw_pte_young(void)
 {
-	WARN_ON(preemptible());
-
-	return !cpu_has_hw_af();
+	return system_has_hw_af();
 }
-#define arch_faults_on_old_pte		arch_faults_on_old_pte
+#define arch_has_hw_pte_young		arch_has_hw_pte_young
 
 /*
  * Experimentally, it's cheap to set the access flag in hardware and we
@@ -1007,7 +1005,7 @@  static inline bool arch_faults_on_old_pte(void)
  */
 static inline bool arch_wants_old_prefaulted_pte(void)
 {
-	return !arch_faults_on_old_pte();
+	return arch_has_hw_pte_young();
 }
 #define arch_wants_old_prefaulted_pte	arch_wants_old_prefaulted_pte
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0ead8bfedf20..d05de77626f5 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1650,6 +1650,14 @@  static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
 	return true;
 }
 
+static void cpu_enable_hw_af(struct arm64_cpu_capabilities const *cap)
+{
+	u64 val = read_sysreg(tcr_el1);
+
+	write_sysreg(val | TCR_HA, tcr_el1);
+	isb();
+	local_flush_tlb_all();
+}
 #endif
 
 #ifdef CONFIG_ARM64_AMU_EXTN
@@ -2126,6 +2134,17 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_hw_dbm,
 		.cpu_enable = cpu_enable_hw_dbm,
 	},
+	{
+		.desc = "Hardware update of the Access flag",
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.capability = ARM64_HW_AF,
+		.sys_reg = SYS_ID_AA64MMFR1_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64MMFR1_HADBS_SHIFT,
+		.min_field_value = 1,
+		.matches = has_cpuid_feature,
+		.cpu_enable = cpu_enable_hw_af,
+	},
 #endif
 	{
 		.desc = "CRC32 instructions",
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 35936c5ae1ce..b066d5712e3d 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -478,18 +478,6 @@  SYM_FUNC_START(__cpu_setup)
 	 * Set the IPS bits in TCR_EL1.
 	 */
 	tcr_compute_pa_size tcr, #TCR_IPS_SHIFT, x5, x6
-#ifdef CONFIG_ARM64_HW_AFDBM
-	/*
-	 * Enable hardware update of the Access Flags bit.
-	 * Hardware dirty bit management is enabled later,
-	 * via capabilities.
-	 */
-	mrs	x9, ID_AA64MMFR1_EL1
-	and	x9, x9, #0xf
-	cbz	x9, 1f
-	orr	tcr, tcr, #TCR_HA		// hardware Access flag update
-1:
-#endif	/* CONFIG_ARM64_HW_AFDBM */
 	msr	mair_el1, mair
 	msr	tcr_el1, tcr
 	/*
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 49305c2e6dfd..d52f50671e60 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -35,6 +35,7 @@  HAS_STAGE2_FWB
 HAS_SYSREG_GIC_CPUIF
 HAS_TLB_RANGE
 HAS_VIRT_HOST_EXTN
+HW_AF
 HW_DBM
 KVM_PROTECTED_MODE
 MISMATCHED_CACHE_TYPE
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 448cd01eb3ec..3908780fc408 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1397,10 +1397,10 @@  static inline bool arch_has_pfn_modify_check(void)
 	return boot_cpu_has_bug(X86_BUG_L1TF);
 }
 
-#define arch_faults_on_old_pte arch_faults_on_old_pte
-static inline bool arch_faults_on_old_pte(void)
+#define arch_has_hw_pte_young arch_has_hw_pte_young
+static inline bool arch_has_hw_pte_young(void)
 {
-	return false;
+	return true;
 }
 
 #endif	/* __ASSEMBLY__ */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index e24d2c992b11..3a8221fa2c76 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -258,6 +258,18 @@  static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
+#ifndef arch_has_hw_pte_young
+static inline bool arch_has_hw_pte_young(void)
+{
+	/*
+	 * Those arches which have hw access flag feature need to implement
+	 * their own helper. By default, "false" means pagefault will be hit
+	 * on old pte.
+	 */
+	return false;
+}
+#endif
+
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index 25fc46e87214..2f96179db219 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -121,18 +121,6 @@  int randomize_va_space __read_mostly =
 					2;
 #endif
 
-#ifndef arch_faults_on_old_pte
-static inline bool arch_faults_on_old_pte(void)
-{
-	/*
-	 * Those arches which don't have hw access flag feature need to
-	 * implement their own helper. By default, "true" means pagefault
-	 * will be hit on old pte.
-	 */
-	return true;
-}
-#endif
-
 #ifndef arch_wants_old_prefaulted_pte
 static inline bool arch_wants_old_prefaulted_pte(void)
 {
@@ -2769,7 +2757,7 @@  static inline bool cow_user_page(struct page *dst, struct page *src,
 	 * On architectures with software "accessed" bits, we would
 	 * take a double page fault, so mark it accessed here.
 	 */
-	if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
+	if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) {
 		pte_t entry;
 
 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);