diff mbox series

[v11,05/12] x86/mm: add INVLPGB support code

Message ID 20250213161423.449435-6-riel@surriel.com (mailing list archive)
State New
Headers show
Series AMD broadcast TLB invalidation | expand

Commit Message

Rik van Riel Feb. 13, 2025, 4:13 p.m. UTC
Add invlpgb.h with the helper functions and definitions needed to use
broadcast TLB invalidation on AMD EPYC 3 and newer CPUs.

Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
Tested-by: Brendan Jackman <jackmanb@google.com>
Tested-by: Michael Kelley <mhklinux@outlook.com>
---
 arch/x86/include/asm/disabled-features.h |   8 +-
 arch/x86/include/asm/invlpgb.h           | 101 +++++++++++++++++++++++
 arch/x86/include/asm/tlbflush.h          |   1 +
 3 files changed, 109 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/include/asm/invlpgb.h

Comments

Dave Hansen Feb. 14, 2025, 6:22 p.m. UTC | #1
On 2/13/25 08:13, Rik van Riel wrote:
> Add invlpgb.h with the helper functions and definitions needed to use
> broadcast TLB invalidation on AMD EPYC 3 and newer CPUs.

You should also note here that (or if??) all these functions get used
later in the series.

> +/* Flush addr, including globals, for all PCIDs. */
> +static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
> +{
> +	__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
> +}

Something about the "nr - 1"'s needs to get mentioned *somewhere*. I
think the best place is actually in __invlpgb(). Basically make the
calling convention for __invlpgb() be the _sane_ thing where nr==1
flushes 1 page. Then do the nr-=1 in __invlpgb() and document why.

I don't mean to insult the AMD ISA designers here. I might have done the
same thing. But the software to use the instruction ends up looking
really funky. It would be great to limit the number of places that deal
with the funkiness to exactly 1.

With those two nits addressed:

Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Rik van Riel Feb. 18, 2025, 5:23 p.m. UTC | #2
On Fri, 2025-02-14 at 10:22 -0800, Dave Hansen wrote:
> On 2/13/25 08:13, Rik van Riel wrote:
> > Add invlpgb.h with the helper functions and definitions needed to
> > use
> > broadcast TLB invalidation on AMD EPYC 3 and newer CPUs.
> 
> You should also note here that (or if??) all these functions get used
> later in the series.
> 
You made me look, and there was indeed one helper
function that is not being used. I removed it,
and added in the commit message that all the (remaining)
functions are used.


> > +/* Flush addr, including globals, for all PCIDs. */
> > +static inline void invlpgb_flush_addr_nosync(unsigned long addr,
> > u16 nr)
> > +{
> > +	__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
> > +}
> 
> Something about the "nr - 1"'s needs to get mentioned *somewhere*. I
> think the best place is actually in __invlpgb(). Basically make the
> calling convention for __invlpgb() be the _sane_ thing where nr==1
> flushes 1 page. Then do the nr-=1 in __invlpgb() and document why.
> 
> I don't mean to insult the AMD ISA designers here. I might have done
> the
> same thing. But the software to use the instruction ends up looking
> really funky. It would be great to limit the number of places that
> deal
> with the funkiness to exactly 1.
> 
> With those two nits addressed:
> 
> Acked-by: Dave Hansen <dave.hansen@linux.intel.com>

Nits addressed, and the Acked-by header added to the
next version :)

Thank you!
Borislav Petkov Feb. 19, 2025, 12:04 p.m. UTC | #3
On Thu, Feb 13, 2025 at 11:13:56AM -0500, Rik van Riel wrote:
> Add invlpgb.h with the helper functions and definitions needed to use
> broadcast TLB invalidation on AMD EPYC 3 and newer CPUs.
> 
> Signed-off-by: Rik van Riel <riel@surriel.com>
> Tested-by: Manali Shukla <Manali.Shukla@amd.com>
> Tested-by: Brendan Jackman <jackmanb@google.com>
> Tested-by: Michael Kelley <mhklinux@outlook.com>
> ---
>  arch/x86/include/asm/disabled-features.h |   8 +-
>  arch/x86/include/asm/invlpgb.h           | 101 +++++++++++++++++++++++
>  arch/x86/include/asm/tlbflush.h          |   1 +
>  3 files changed, 109 insertions(+), 1 deletion(-)
>  create mode 100644 arch/x86/include/asm/invlpgb.h
> 
> diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
> index c492bdc97b05..625a89259968 100644
> --- a/arch/x86/include/asm/disabled-features.h
> +++ b/arch/x86/include/asm/disabled-features.h
> @@ -129,6 +129,12 @@
>  #define DISABLE_SEV_SNP		(1 << (X86_FEATURE_SEV_SNP & 31))
>  #endif
>  
> +#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
> +#define DISABLE_INVLPGB		0
> +#else
> +#define DISABLE_INVLPGB		(1 << (X86_FEATURE_INVLPGB & 31))
> +#endif
> +

What does that bring you really in savings?

>  /*
>   * Make sure to add features to the correct mask
>   */
> @@ -146,7 +152,7 @@
>  #define DISABLED_MASK11	(DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
>  			 DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
>  #define DISABLED_MASK12	(DISABLE_FRED|DISABLE_LAM)
> -#define DISABLED_MASK13	0
> +#define DISABLED_MASK13	(DISABLE_INVLPGB)
>  #define DISABLED_MASK14	0
>  #define DISABLED_MASK15	0
>  #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
> diff --git a/arch/x86/include/asm/invlpgb.h b/arch/x86/include/asm/invlpgb.h
> new file mode 100644
> index 000000000000..a1d5dedd5217
> --- /dev/null
> +++ b/arch/x86/include/asm/invlpgb.h

I remember asking you to add all that gunk to arch/x86/include/asm/tlb.h.
Please do so.

> +#define INVLPGB_VA			BIT(0)
> +#define INVLPGB_PCID			BIT(1)
> +#define INVLPGB_ASID			BIT(2)
> +#define INVLPGB_INCLUDE_GLOBAL		BIT(3)
> +#define INVLPGB_FINAL_ONLY		BIT(4)
> +#define INVLPGB_INCLUDE_NESTED		BIT(5)
> +
> +/* Flush all mappings for a given pcid and addr, not including globals. */
> +static inline void invlpgb_flush_user(unsigned long pcid,
> +				      unsigned long addr)

Please drop this unused function.
Rik van Riel Feb. 19, 2025, 5:42 p.m. UTC | #4
On Wed, 2025-02-19 at 13:04 +0100, Borislav Petkov wrote:
> On Thu, Feb 13, 2025 at 11:13:56AM -0500, Rik van Riel wrote:
> > 
> > index 000000000000..a1d5dedd5217
> > --- /dev/null
> > +++ b/arch/x86/include/asm/invlpgb.h
> 
> I remember asking you to add all that gunk to
> arch/x86/include/asm/tlb.h.
> Please do so.

Dave just asked me to split out more things into
their own files.

I'm happy to do whatever the maintainers want,
but when you both want the opposite from each
other, I won't be able to make you both happy.

What should I be doing here?
Dave Hansen Feb. 19, 2025, 7:01 p.m. UTC | #5
On 2/19/25 09:42, Rik van Riel wrote:
> On Wed, 2025-02-19 at 13:04 +0100, Borislav Petkov wrote:
>> On Thu, Feb 13, 2025 at 11:13:56AM -0500, Rik van Riel wrote:
>>>
>>> index 000000000000..a1d5dedd5217
>>> --- /dev/null
>>> +++ b/arch/x86/include/asm/invlpgb.h
>>
>> I remember asking you to add all that gunk to
>> arch/x86/include/asm/tlb.h.
>> Please do so.
> 
> Dave just asked me to split out more things into
> their own files.
> 
> I'm happy to do whatever the maintainers want,
> but when you both want the opposite from each
> other, I won't be able to make you both happy.
> 
> What should I be doing here?

I think you're referring to this:

> https://lore.kernel.org/all/b067a9fc-ff5f-4baa-a1ff-3fa749ae4d44@intel.com/

I don't have a strong preference about creating an invlpgb.h header. As
long as the header is still relatively small the #ifdef pile is
readable, it's fine to stick in an existing header.

The thing I raised about the 09/12 patch was a large #ifdef in a .c
file. We have a general rule to avoid #ifdefs in .c files. Specifically,
I find that having large #ifdef's regions in a .c file means that you
literally can't see both sides of the #ifdef at once or easily
understand which code belongs to the #ifdef. The result is invariably
weird compile issues that pop up.

But, either way, #ifdefs are a sign of weakness. Less so in a header and
more so in a .c file.
Borislav Petkov Feb. 19, 2025, 7:15 p.m. UTC | #6
On Wed, Feb 19, 2025 at 11:01:17AM -0800, Dave Hansen wrote:
> But, either way, #ifdefs are a sign of weakness. Less so in a header and
> more so in a .c file.

... and, as we just discussed and agreed on chat, we don't need the Kconfig
option either.

Thx.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index c492bdc97b05..625a89259968 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -129,6 +129,12 @@ 
 #define DISABLE_SEV_SNP		(1 << (X86_FEATURE_SEV_SNP & 31))
 #endif
 
+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
+#define DISABLE_INVLPGB		0
+#else
+#define DISABLE_INVLPGB		(1 << (X86_FEATURE_INVLPGB & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -146,7 +152,7 @@ 
 #define DISABLED_MASK11	(DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
 			 DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
 #define DISABLED_MASK12	(DISABLE_FRED|DISABLE_LAM)
-#define DISABLED_MASK13	0
+#define DISABLED_MASK13	(DISABLE_INVLPGB)
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
diff --git a/arch/x86/include/asm/invlpgb.h b/arch/x86/include/asm/invlpgb.h
new file mode 100644
index 000000000000..a1d5dedd5217
--- /dev/null
+++ b/arch/x86/include/asm/invlpgb.h
@@ -0,0 +1,101 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_INVLPGB
+#define _ASM_X86_INVLPGB
+
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
+
+/*
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
+ */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+			     unsigned long addr, u16 extra_count,
+			     bool pmd_stride, u8 flags)
+{
+	u32 edx = (pcid << 16) | asid;
+	u32 ecx = (pmd_stride << 31) | extra_count;
+	u64 rax = addr | flags;
+
+	/* The low bits in rax are for flags. Verify addr is clean. */
+	VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+	/* INVLPGB; supported in binutils >= 2.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xfe" : : "a" (rax), "c" (ecx), "d" (edx));
+}
+
+/* Wait for INVLPGB originated by this CPU to complete. */
+static inline void tlbsync(void)
+{
+	cant_migrate();
+	/* TLBSYNC: supported in binutils >= 0.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - INVLPGB_PCID:			  invalidate all TLB entries matching the PCID
+ *
+ * The first can be used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_VA			BIT(0)
+#define INVLPGB_PCID			BIT(1)
+#define INVLPGB_ASID			BIT(2)
+#define INVLPGB_INCLUDE_GLOBAL		BIT(3)
+#define INVLPGB_FINAL_ONLY		BIT(4)
+#define INVLPGB_INCLUDE_NESTED		BIT(5)
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invlpgb_flush_user(unsigned long pcid,
+				      unsigned long addr)
+{
+	__invlpgb(0, pcid, addr, 0, 0, INVLPGB_PCID | INVLPGB_VA);
+	tlbsync();
+}
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+						unsigned long addr,
+						u16 nr,
+						bool pmd_stride)
+{
+	__invlpgb(0, pcid, addr, nr - 1, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+	__invlpgb(0, pcid, 0, 0, 0, INVLPGB_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+	__invlpgb(0, 0, 0, 0, 0, INVLPGB_INCLUDE_GLOBAL);
+	tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+	__invlpgb(0, 0, addr, nr - 1, 0, INVLPGB_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+	__invlpgb(0, 0, 0, 0, 0, 0);
+	tlbsync();
+}
+
+#endif /* _ASM_X86_INVLPGB */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index e026a5cc388e..bda7080dec83 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -10,6 +10,7 @@ 
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 #include <asm/smp.h>
+#include <asm/invlpgb.h>
 #include <asm/invpcid.h>
 #include <asm/pti.h>
 #include <asm/processor-flags.h>