Message ID | 20250206044346.3810242-5-riel@surriel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | AMD broadcast TLB invalidation | expand |
On Thu, 6 Feb 2025 at 05:45, Rik van Riel <riel@surriel.com> wrote: > diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h > index 17b6590748c0..f9b832e971c5 100644 > --- a/arch/x86/include/asm/cpufeatures.h > +++ b/arch/x86/include/asm/cpufeatures.h > @@ -338,6 +338,7 @@ > #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ > #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ > #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ > +#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instruction supported. */ Why no "invlpgb" here? Seems like having this flag visible in cpuinfo would be worthwhile. If there's a reason to hide it maybe add a comment to explain the reason? Sorry if this is a stupid question - I also can't see an obvious rationale for why existing flags do or don't get a name at runtime.
Oh, sorry On Fri, 7 Feb 2025 at 16:10, Brendan Jackman <jackmanb@google.com> wrote: > > On Thu, 6 Feb 2025 at 05:45, Rik van Riel <riel@surriel.com> wrote: > > diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h > > index 17b6590748c0..f9b832e971c5 100644 > > --- a/arch/x86/include/asm/cpufeatures.h > > +++ b/arch/x86/include/asm/cpufeatures.h > > @@ -338,6 +338,7 @@ > > #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ > > #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ > > #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ > > +#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instruction supported. */ > > Why no "invlpgb" here? Seems like having this flag visible in cpuinfo > would be worthwhile. > > If there's a reason to hide it maybe add a comment to explain the > reason? Sorry if this is a stupid question - I also can't see an > obvious rationale for why existing flags do or don't get a name at > runtime. Oh, found it: https://lore.kernel.org/lkml/20250102120450.GNZ3aA4oVPnoJYRVUL@fat_crate.local/ Sorry for the noise, please ignore.
*I do some test on my Machine with AMD EPYC 7K83, these patches work on my host, but failed on my guest with qemu.* *in host, use lscpu cmd, you can see invlpgb in flags, but in guest no.* *So are you plan to support it in guest?* Best Regards! Thanks Rik van Riel <riel@surriel.com> 于2025年2月6日周四 12:45写道: > The CPU advertises the maximum number of pages that can be shot down > with one INVLPGB instruction in the CPUID data. > > Save that information for later use. > > Signed-off-by: Rik van Riel <riel@surriel.com> > Tested-by: Manali Shukla <Manali.Shukla@amd.com> > --- > arch/x86/Kconfig.cpu | 5 +++++ > arch/x86/include/asm/cpufeatures.h | 1 + > arch/x86/include/asm/tlbflush.h | 7 +++++++ > arch/x86/kernel/cpu/amd.c | 8 ++++++++ > 4 files changed, 21 insertions(+) > > diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu > index 2a7279d80460..abe013a1b076 100644 > --- a/arch/x86/Kconfig.cpu > +++ b/arch/x86/Kconfig.cpu > @@ -395,6 +395,10 @@ config X86_VMX_FEATURE_NAMES > def_bool y > depends on IA32_FEAT_CTL > > +config X86_BROADCAST_TLB_FLUSH > + def_bool y > + depends on CPU_SUP_AMD && 64BIT > + > menuconfig PROCESSOR_SELECT > bool "Supported processor vendors" if EXPERT > help > @@ -431,6 +435,7 @@ config CPU_SUP_CYRIX_32 > config CPU_SUP_AMD > default y > bool "Support AMD processors" if PROCESSOR_SELECT > + select X86_BROADCAST_TLB_FLUSH > help > This enables detection, tunings and quirks for AMD processors > > diff --git a/arch/x86/include/asm/cpufeatures.h > b/arch/x86/include/asm/cpufeatures.h > index 17b6590748c0..f9b832e971c5 100644 > --- a/arch/x86/include/asm/cpufeatures.h > +++ b/arch/x86/include/asm/cpufeatures.h > @@ -338,6 +338,7 @@ > #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO > instruction */ > #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" > Instructions Retired Count */ > #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always > save/restore FP error pointers */ > +#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC > instruction supported. */ > #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read > processor register at user level */ > #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD > instruction */ > #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch > Prediction Barrier */ > diff --git a/arch/x86/include/asm/tlbflush.h > b/arch/x86/include/asm/tlbflush.h > index 02fc2aa06e9e..8fe3b2dda507 100644 > --- a/arch/x86/include/asm/tlbflush.h > +++ b/arch/x86/include/asm/tlbflush.h > @@ -183,6 +183,13 @@ static inline void cr4_init_shadow(void) > extern unsigned long mmu_cr4_features; > extern u32 *trampoline_cr4_features; > > +/* How many pages can we invalidate with one INVLPGB. */ > +#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH > +extern u16 invlpgb_count_max; > +#else > +#define invlpgb_count_max 1 > +#endif > + > extern void initialize_tlbstate_and_flush(void); > > /* > diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c > index 79d2e17f6582..bcf73775b4f8 100644 > --- a/arch/x86/kernel/cpu/amd.c > +++ b/arch/x86/kernel/cpu/amd.c > @@ -29,6 +29,8 @@ > > #include "cpu.h" > > +u16 invlpgb_count_max __ro_after_init; > + > static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) > { > u32 gprs[8] = { 0 }; > @@ -1135,6 +1137,12 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 > *c) > tlb_lli_2m[ENTRIES] = eax & mask; > > tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; > + > + /* Max number of pages INVLPGB can invalidate in one shot */ > + if (boot_cpu_has(X86_FEATURE_INVLPGB)) { > + cpuid(0x80000008, &eax, &ebx, &ecx, &edx); > + invlpgb_count_max = (edx & 0xffff) + 1; > + } > } > > static const struct cpu_dev amd_cpu_dev = { > -- > 2.47.1 > > >
On Mon, 2025-02-10 at 15:30 +0800, Vern Hao wrote: > I do some test on my Machine with AMD EPYC 7K83, these patches work > on my host, but failed on my guest with qemu. > > in host, use lscpu cmd, you can see invlpgb in flags, but in guest > no. > > So are you plan to support it in guest? How exactly did it fail in the guest? Did the guest simply not use INVLPGB because that CPUID flag was not presented in the CPUID that qemu shows to the guest, or did things crash somehow? My understanding is that while INVLPGB can work in guests, actually implementing that is a whole other can of worms, and definitely not something we should try to tackle at the same time as bare metal support. A TLB flush hypercall, with IRQ-less flushing on the hypervisor side will probably get us 90% of the way there, potentially with less overall complexity than actually supporting INVLPGB in the guest.
i support these patches in host and guest, and add this patch to support cpuid flags in kvm. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index db3838667466..fd21d9438137 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -488,7 +488,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(CLZERO) | F(XSAVEERPTR) | + F(CLZERO) | F(XSAVEERPTR) | F(INVLPGB) | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON); But in guest, use lscpu cmd, i still can not see invlpgb, so i just wonder where is wrong ? Best Regards! Thanks Rik van Riel <riel@surriel.com> 于2025年2月11日周二 00:50写道: > On Mon, 2025-02-10 at 15:30 +0800, Vern Hao wrote: > > I do some test on my Machine with AMD EPYC 7K83, these patches work > > on my host, but failed on my guest with qemu. > > > > in host, use lscpu cmd, you can see invlpgb in flags, but in guest > > no. > > > > So are you plan to support it in guest? > > How exactly did it fail in the guest? > > Did the guest simply not use INVLPGB because that > CPUID flag was not presented in the CPUID that > qemu shows to the guest, or did things crash somehow? > > My understanding is that while INVLPGB can work > in guests, actually implementing that is a whole > other can of worms, and definitely not something > we should try to tackle at the same time as bare > metal support. > > A TLB flush hypercall, with IRQ-less flushing on > the hypervisor side will probably get us 90% of > the way there, potentially with less overall > complexity than actually supporting INVLPGB in > the guest. > > -- > All Rights Reversed. >
On 2025/2/11 00:48, Rik van Riel wrote: > On Mon, 2025-02-10 at 15:30 +0800, Vern Hao wrote: >> I do some test on my Machine with AMD EPYC 7K83, these patches work >> on my host, but failed on my guest with qemu. >> >> in host, use lscpu cmd, you can see invlpgb in flags, but in guest >> no. >> >> So are you plan to support it in guest? > How exactly did it fail in the guest? > > Did the guest simply not use INVLPGB because that > CPUID flag was not presented in the CPUID that > qemu shows to the guest, or did things crash somehow? i support these patches in host and guest, and add this patch to support cpuid flags in kvm. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index db3838667466..fd21d9438137 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -488,7 +488,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(CLZERO) | F(XSAVEERPTR) | + F(CLZERO) | F(XSAVEERPTR) | F(INVLPGB) | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON); But in guest, use lscpu cmd, i still can not see invlpgb, so i just wonder where is wrong ? > > My understanding is that while INVLPGB can work > in guests, actually implementing that is a whole > other can of worms, and definitely not something > we should try to tackle at the same time as bare > metal support. > > A TLB flush hypercall, with IRQ-less flushing on > the hypervisor side will probably get us 90% of > the way there, potentially with less overall > complexity than actually supporting INVLPGB in > the guest. >
On 2/11/25 19:57, Vern Hao wrote: > > On 2025/2/11 00:48, Rik van Riel wrote: >> On Mon, 2025-02-10 at 15:30 +0800, Vern Hao wrote: >>> I do some test on my Machine with AMD EPYC 7K83, these patches work >>> on my host, but failed on my guest with qemu. >>> >>> in host, use lscpu cmd, you can see invlpgb in flags, but in guest >>> no. >>> >>> So are you plan to support it in guest? >> How exactly did it fail in the guest? >> >> Did the guest simply not use INVLPGB because that >> CPUID flag was not presented in the CPUID that >> qemu shows to the guest, or did things crash somehow? > i support these patches in host and guest, and add this patch to support > cpuid flags in kvm. > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index db3838667466..fd21d9438137 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -488,7 +488,7 @@ static inline int __do_cpuid_func(struct > kvm_cpuid_entry2 *entry, u32 function, > > /* cpuid 0x80000008.ebx */ > const u32 kvm_cpuid_8000_0008_ebx_x86_features = > - F(CLZERO) | F(XSAVEERPTR) | > + F(CLZERO) | F(XSAVEERPTR) | F(INVLPGB) | > F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | > F(VIRT_SSBD) | > F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON); > > But in guest, use lscpu cmd, i still can not see invlpgb, so i just > wonder where is wrong ? Well, for one, the INVLPGB instruction has to be enabled in the VMCB in order for it to be used (unless it is an SEV-ES or SEV-SNP guest). Also, lscpu won't show "invlpgb" since the patches don't define the feature in the way that it would be visible via lscpu. You need to issue CPUID to see the bit is set or not. Also, you might need VMM support in order for that CPUID bit to be set in the guest. But, it will take hypervisor support to use INVLPGB in a non-SEV guest, since non-SEV guests do not use global ASIDs. In this case, the instruction will need to be intercepted and the hypervisor will need to determine how to process it. If you have an SEV guest, which use global ASIDs and use the same ASID for all vCPUs within a guest, you can use INVLPGB in the guest without issue and without needing to intercept the instruction. See "Guest Usage of INVLPGB" in AMD APM Vol 3 under the INVLPGB instruction documentation. Thanks, Tom > >> My understanding is that while INVLPGB can work >> in guests, actually implementing that is a whole >> other can of worms, and definitely not something >> we should try to tackle at the same time as bare >> metal support. >> >> A TLB flush hypercall, with IRQ-less flushing on >> the hypervisor side will probably get us 90% of >> the way there, potentially with less overall >> complexity than actually supporting INVLPGB in >> the guest. >>
On 2025/2/12 23:56, Tom Lendacky wrote: > On 2/11/25 19:57, Vern Hao wrote: >> On 2025/2/11 00:48, Rik van Riel wrote: >>> On Mon, 2025-02-10 at 15:30 +0800, Vern Hao wrote: >>>> I do some test on my Machine with AMD EPYC 7K83, these patches work >>>> on my host, but failed on my guest with qemu. >>>> >>>> in host, use lscpu cmd, you can see invlpgb in flags, but in guest >>>> no. >>>> >>>> So are you plan to support it in guest? >>> How exactly did it fail in the guest? >>> >>> Did the guest simply not use INVLPGB because that >>> CPUID flag was not presented in the CPUID that >>> qemu shows to the guest, or did things crash somehow? >> i support these patches in host and guest, and add this patch to support >> cpuid flags in kvm. >> >> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c >> index db3838667466..fd21d9438137 100644 >> --- a/arch/x86/kvm/cpuid.c >> +++ b/arch/x86/kvm/cpuid.c >> @@ -488,7 +488,7 @@ static inline int __do_cpuid_func(struct >> kvm_cpuid_entry2 *entry, u32 function, >> >> /* cpuid 0x80000008.ebx */ >> const u32 kvm_cpuid_8000_0008_ebx_x86_features = >> - F(CLZERO) | F(XSAVEERPTR) | >> + F(CLZERO) | F(XSAVEERPTR) | F(INVLPGB) | >> F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | >> F(VIRT_SSBD) | >> F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON); >> >> But in guest, use lscpu cmd, i still can not see invlpgb, so i just >> wonder where is wrong ? > Well, for one, the INVLPGB instruction has to be enabled in the VMCB in > order for it to be used (unless it is an SEV-ES or SEV-SNP guest). Also, > lscpu won't show "invlpgb" since the patches don't define the feature in > the way that it would be visible via lscpu. You need to issue CPUID to > see the bit is set or not. Also, you might need VMM support in order for > that CPUID bit to be set in the guest. > > But, it will take hypervisor support to use INVLPGB in a non-SEV guest, > since non-SEV guests do not use global ASIDs. In this case, the > instruction will need to be intercepted and the hypervisor will need to > determine how to process it. > > If you have an SEV guest, which use global ASIDs and use the same ASID > for all vCPUs within a guest, you can use INVLPGB in the guest without > issue and without needing to intercept the instruction. > > See "Guest Usage of INVLPGB" in AMD APM Vol 3 under the INVLPGB > instruction documentation. OK, Thanks a lot*. * *Obviously, my 5.10 version of the kernel KVM does not support this CPUID well, i test it on upstream kernel, it works well.* > > Thanks, > Tom > >>> My understanding is that while INVLPGB can work >>> in guests, actually implementing that is a whole >>> other can of worms, and definitely not something >>> we should try to tackle at the same time as bare >>> metal support. >>> >>> A TLB flush hypercall, with IRQ-less flushing on >>> the hypervisor side will probably get us 90% of >>> the way there, potentially with less overall >>> complexity than actually supporting INVLPGB in >>> the guest. >>>
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2a7279d80460..abe013a1b076 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -395,6 +395,10 @@ config X86_VMX_FEATURE_NAMES def_bool y depends on IA32_FEAT_CTL +config X86_BROADCAST_TLB_FLUSH + def_bool y + depends on CPU_SUP_AMD && 64BIT + menuconfig PROCESSOR_SELECT bool "Supported processor vendors" if EXPERT help @@ -431,6 +435,7 @@ config CPU_SUP_CYRIX_32 config CPU_SUP_AMD default y bool "Support AMD processors" if PROCESSOR_SELECT + select X86_BROADCAST_TLB_FLUSH help This enables detection, tunings and quirks for AMD processors diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 17b6590748c0..f9b832e971c5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -338,6 +338,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instruction supported. */ #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 02fc2aa06e9e..8fe3b2dda507 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -183,6 +183,13 @@ static inline void cr4_init_shadow(void) extern unsigned long mmu_cr4_features; extern u32 *trampoline_cr4_features; +/* How many pages can we invalidate with one INVLPGB. */ +#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH +extern u16 invlpgb_count_max; +#else +#define invlpgb_count_max 1 +#endif + extern void initialize_tlbstate_and_flush(void); /* diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 79d2e17f6582..bcf73775b4f8 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -29,6 +29,8 @@ #include "cpu.h" +u16 invlpgb_count_max __ro_after_init; + static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) { u32 gprs[8] = { 0 }; @@ -1135,6 +1137,12 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) tlb_lli_2m[ENTRIES] = eax & mask; tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + + /* Max number of pages INVLPGB can invalidate in one shot */ + if (boot_cpu_has(X86_FEATURE_INVLPGB)) { + cpuid(0x80000008, &eax, &ebx, &ecx, &edx); + invlpgb_count_max = (edx & 0xffff) + 1; + } } static const struct cpu_dev amd_cpu_dev = {