diff mbox series

arm64: Get rid of ARM64_HAS_NO_HW_PREFETCH

Message ID 20231122133754.1240687-1-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series arm64: Get rid of ARM64_HAS_NO_HW_PREFETCH | expand

Commit Message

Marc Zyngier Nov. 22, 2023, 1:37 p.m. UTC
Back in 2016, it was argued that implementations lacking a HW
prefetcher could be helped by sprinkling a number of PRFM
instructions in strategic locations.

In 2023, the one platform that presumably needed this hack is no
longer in active use (let alone maintained), and an quick
experiment shows dropping this hack only leads to a 0.4% drop
on a full kernel compilation (tested on a MT30-GS0 48 CPU system).

Given that this is pretty much in the noise department and that
it may give odd ideas to other implementers, drop the hack for
good.

Suggested-by: Will Deacon <will@kernel.org>
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kernel/cpufeature.c | 16 ----------------
 arch/arm64/lib/copy_page.S     | 11 -----------
 arch/arm64/tools/cpucaps       |  1 -
 3 files changed, 28 deletions(-)

Comments

Catalin Marinas Nov. 23, 2023, 7:24 p.m. UTC | #1
On Wed, Nov 22, 2023 at 01:37:54PM +0000, Marc Zyngier wrote:
> Back in 2016, it was argued that implementations lacking a HW
> prefetcher could be helped by sprinkling a number of PRFM
> instructions in strategic locations.
> 
> In 2023, the one platform that presumably needed this hack is no
> longer in active use (let alone maintained), and an quick
> experiment shows dropping this hack only leads to a 0.4% drop
> on a full kernel compilation (tested on a MT30-GS0 48 CPU system).
> 
> Given that this is pretty much in the noise department and that
> it may give odd ideas to other implementers, drop the hack for
> good.
> 
> Suggested-by: Will Deacon <will@kernel.org>
> Suggested-by: Mark Rutland <mark.rutland@arm.com>
> Signed-off-by: Marc Zyngier <maz@kernel.org>

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Will Deacon Dec. 5, 2023, 3:16 p.m. UTC | #2
On Wed, 22 Nov 2023 13:37:54 +0000, Marc Zyngier wrote:
> Back in 2016, it was argued that implementations lacking a HW
> prefetcher could be helped by sprinkling a number of PRFM
> instructions in strategic locations.
> 
> In 2023, the one platform that presumably needed this hack is no
> longer in active use (let alone maintained), and an quick
> experiment shows dropping this hack only leads to a 0.4% drop
> on a full kernel compilation (tested on a MT30-GS0 48 CPU system).
> 
> [...]

Applied to arm64 (for-next/cpufeature), thanks!

[1/1] arm64: Get rid of ARM64_HAS_NO_HW_PREFETCH
      https://git.kernel.org/arm64/c/103423ad7e56

Cheers,
diff mbox series

Patch

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 646591c67e7a..b335da126e86 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1584,16 +1584,6 @@  static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry,
 	return has_sre;
 }
 
-static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused)
-{
-	u32 midr = read_cpuid_id();
-
-	/* Cavium ThunderX pass 1.x and 2.x */
-	return midr_is_cpu_model_range(midr, MIDR_THUNDERX,
-		MIDR_CPU_VAR_REV(0, 0),
-		MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
-}
-
 static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
 			  int scope)
 {
@@ -2321,12 +2311,6 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP)
 	},
 #endif /* CONFIG_ARM64_LSE_ATOMICS */
-	{
-		.desc = "Software prefetching using PRFM",
-		.capability = ARM64_HAS_NO_HW_PREFETCH,
-		.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
-		.matches = has_no_hw_prefetch,
-	},
 	{
 		.desc = "Virtualization Host Extensions",
 		.capability = ARM64_HAS_VIRT_HOST_EXTN,
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index c336d2ffdec5..6a56d7cf309d 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -18,13 +18,6 @@ 
  *	x1 - src
  */
 SYM_FUNC_START(__pi_copy_page)
-alternative_if ARM64_HAS_NO_HW_PREFETCH
-	// Prefetch three cache lines ahead.
-	prfm	pldl1strm, [x1, #128]
-	prfm	pldl1strm, [x1, #256]
-	prfm	pldl1strm, [x1, #384]
-alternative_else_nop_endif
-
 	ldp	x2, x3, [x1]
 	ldp	x4, x5, [x1, #16]
 	ldp	x6, x7, [x1, #32]
@@ -39,10 +32,6 @@  alternative_else_nop_endif
 1:
 	tst	x0, #(PAGE_SIZE - 1)
 
-alternative_if ARM64_HAS_NO_HW_PREFETCH
-	prfm	pldl1strm, [x1, #384]
-alternative_else_nop_endif
-
 	stnp	x2, x3, [x0, #-256]
 	ldp	x2, x3, [x1]
 	stnp	x4, x5, [x0, #16 - 256]
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index b98c38288a9d..0eb2a2d2f783 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -40,7 +40,6 @@  HAS_LDAPR
 HAS_LSE_ATOMICS
 HAS_MOPS
 HAS_NESTED_VIRT
-HAS_NO_HW_PREFETCH
 HAS_PAN
 HAS_S1PIE
 HAS_RAS_EXTN