Message ID | 1860502863.219296.1710395908135.JavaMail.zimbra@sjtu.edu.cn (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] KVM:SVM: Flush cache only on CPUs running SEV guest | expand |
On 3/14/24 00:58, Zheyun Shen wrote: > On AMD CPUs without ensuring cache consistency, each memory page > reclamation in an SEV guest triggers a call to wbinvd_on_all_cpus(), > thereby affecting the performance of other programs on the host. > > Typically, an AMD server may have 128 cores or more, while the SEV guest > might only utilize 8 of these cores. Meanwhile, host can use qemu-affinity > to bind these 8 vCPUs to specific physical CPUs. > > Therefore, keeping a record of the physical core numbers each time a vCPU > runs can help avoid flushing the cache for all CPUs every time. > > Since the usage of sev_flush_asids() isn't tied to a single VM, we just > replace all wbinvd_on_all_cpus() with sev_do_wbinvd() except for that > in sev_flush_asids(). > > Signed-off-by: Zheyun Shen <szy0127@sjtu.edu.cn> Ran this through our (somewhat limited) CI and also on older hardware. Nothing bad happened, so: Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Note, the patch itself seems white-space damaged - what should be tabs are all spaces. > --- > v2 -> v3: > - Replaced get_cpu() with parameter cpu in pre_sev_run(). > > v1 -> v2: > - Added sev_do_wbinvd() to wrap two operations. > - Used cpumask_test_and_clear_cpu() to avoid concurrent problems. > --- > arch/x86/kvm/svm/sev.c | 27 +++++++++++++++++++++++---- > arch/x86/kvm/svm/svm.h | 3 +++ > 2 files changed, 26 insertions(+), 4 deletions(-) > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index f760106c3..743931e33 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -215,6 +215,24 @@ static void sev_asid_free(struct kvm_sev_info *sev) > sev->misc_cg = NULL; > } > > +static struct cpumask *sev_get_wbinvd_dirty_mask(struct kvm *kvm) > +{ > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > + > + return &sev->wbinvd_dirty_mask; > +} > + > +static void sev_do_wbinvd(struct kvm *kvm) > +{ > + int cpu; > + struct cpumask *dirty_mask = sev_get_wbinvd_dirty_mask(kvm); > + > + for_each_possible_cpu(cpu) { > + if (cpumask_test_and_clear_cpu(cpu, dirty_mask)) > + wbinvd_on_cpu(cpu); > + } > +} > + > static void sev_decommission(unsigned int handle) > { > struct sev_data_decommission decommission; > @@ -2048,7 +2066,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm, > * releasing the pages back to the system for use. CLFLUSH will > * not do this, so issue a WBINVD. > */ > - wbinvd_on_all_cpus(); > + sev_do_wbinvd(kvm); > > __unregister_enc_region_locked(kvm, region); > > @@ -2152,7 +2170,7 @@ void sev_vm_destroy(struct kvm *kvm) > * releasing the pages back to the system for use. CLFLUSH will > * not do this, so issue a WBINVD. > */ > - wbinvd_on_all_cpus(); > + sev_do_wbinvd(kvm); > > /* > * if userspace was terminated before unregistering the memory regions > @@ -2343,7 +2361,7 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) > return; > > do_wbinvd: > - wbinvd_on_all_cpus(); > + sev_do_wbinvd(vcpu->kvm); > } > > void sev_guest_memory_reclaimed(struct kvm *kvm) > @@ -2351,7 +2369,7 @@ void sev_guest_memory_reclaimed(struct kvm *kvm) > if (!sev_guest(kvm)) > return; > > - wbinvd_on_all_cpus(); > + sev_do_wbinvd(kvm); > } > > void sev_free_vcpu(struct kvm_vcpu *vcpu) > @@ -2648,6 +2666,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) > sd->sev_vmcbs[asid] = svm->vmcb; > svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; > vmcb_mark_dirty(svm->vmcb, VMCB_ASID); > + cpumask_set_cpu(cpu, sev_get_wbinvd_dirty_mask(svm->vcpu.kvm)); > } > > #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index 8ef95139c..de240a9e9 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -90,6 +90,9 @@ struct kvm_sev_info { > struct list_head mirror_entry; /* Use as a list entry of mirrors */ > struct misc_cg *misc_cg; /* For misc cgroup accounting */ > atomic_t migration_in_progress; > + > + /* CPUs invoked VMRUN should do wbinvd after guest memory is reclaimed */ > + struct cpumask wbinvd_dirty_mask; > }; > > struct kvm_svm { > -- > 2.34.1
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index f760106c3..743931e33 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -215,6 +215,24 @@ static void sev_asid_free(struct kvm_sev_info *sev) sev->misc_cg = NULL; } +static struct cpumask *sev_get_wbinvd_dirty_mask(struct kvm *kvm) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + + return &sev->wbinvd_dirty_mask; +} + +static void sev_do_wbinvd(struct kvm *kvm) +{ + int cpu; + struct cpumask *dirty_mask = sev_get_wbinvd_dirty_mask(kvm); + + for_each_possible_cpu(cpu) { + if (cpumask_test_and_clear_cpu(cpu, dirty_mask)) + wbinvd_on_cpu(cpu); + } +} + static void sev_decommission(unsigned int handle) { struct sev_data_decommission decommission; @@ -2048,7 +2066,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm, * releasing the pages back to the system for use. CLFLUSH will * not do this, so issue a WBINVD. */ - wbinvd_on_all_cpus(); + sev_do_wbinvd(kvm); __unregister_enc_region_locked(kvm, region); @@ -2152,7 +2170,7 @@ void sev_vm_destroy(struct kvm *kvm) * releasing the pages back to the system for use. CLFLUSH will * not do this, so issue a WBINVD. */ - wbinvd_on_all_cpus(); + sev_do_wbinvd(kvm); /* * if userspace was terminated before unregistering the memory regions @@ -2343,7 +2361,7 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) return; do_wbinvd: - wbinvd_on_all_cpus(); + sev_do_wbinvd(vcpu->kvm); } void sev_guest_memory_reclaimed(struct kvm *kvm) @@ -2351,7 +2369,7 @@ void sev_guest_memory_reclaimed(struct kvm *kvm) if (!sev_guest(kvm)) return; - wbinvd_on_all_cpus(); + sev_do_wbinvd(kvm); } void sev_free_vcpu(struct kvm_vcpu *vcpu) @@ -2648,6 +2666,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) sd->sev_vmcbs[asid] = svm->vmcb; svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; vmcb_mark_dirty(svm->vmcb, VMCB_ASID); + cpumask_set_cpu(cpu, sev_get_wbinvd_dirty_mask(svm->vcpu.kvm)); } #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 8ef95139c..de240a9e9 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -90,6 +90,9 @@ struct kvm_sev_info { struct list_head mirror_entry; /* Use as a list entry of mirrors */ struct misc_cg *misc_cg; /* For misc cgroup accounting */ atomic_t migration_in_progress; + + /* CPUs invoked VMRUN should do wbinvd after guest memory is reclaimed */ + struct cpumask wbinvd_dirty_mask; }; struct kvm_svm {
On AMD CPUs without ensuring cache consistency, each memory page reclamation in an SEV guest triggers a call to wbinvd_on_all_cpus(), thereby affecting the performance of other programs on the host. Typically, an AMD server may have 128 cores or more, while the SEV guest might only utilize 8 of these cores. Meanwhile, host can use qemu-affinity to bind these 8 vCPUs to specific physical CPUs. Therefore, keeping a record of the physical core numbers each time a vCPU runs can help avoid flushing the cache for all CPUs every time. Since the usage of sev_flush_asids() isn't tied to a single VM, we just replace all wbinvd_on_all_cpus() with sev_do_wbinvd() except for that in sev_flush_asids(). Signed-off-by: Zheyun Shen <szy0127@sjtu.edu.cn> --- v2 -> v3: - Replaced get_cpu() with parameter cpu in pre_sev_run(). v1 -> v2: - Added sev_do_wbinvd() to wrap two operations. - Used cpumask_test_and_clear_cpu() to avoid concurrent problems. --- arch/x86/kvm/svm/sev.c | 27 +++++++++++++++++++++++---- arch/x86/kvm/svm/svm.h | 3 +++ 2 files changed, 26 insertions(+), 4 deletions(-) -- 2.34.1