Message ID | 21b2119906f4cce10b4133107ece3dab0957c07c.1585548051.git.ashish.kalra@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add AMD SEV guest live migration support | expand |
On 3/30/20 1:23 AM, Ashish Kalra wrote: > From: Ashish Kalra <ashish.kalra@amd.com> > > Add new KVM_FEATURE_SEV_LIVE_MIGRATION feature for guest to check > for host-side support for SEV live migration. Also add a new custom > MSR_KVM_SEV_LIVE_MIG_EN for guest to enable the SEV live migration > feature. > > Also, ensure that _bss_decrypted section is marked as decrypted in the > page encryption bitmap. > > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > --- > Documentation/virt/kvm/cpuid.rst | 4 ++++ > Documentation/virt/kvm/msr.rst | 10 ++++++++++ > arch/x86/include/asm/kvm_host.h | 3 +++ > arch/x86/include/uapi/asm/kvm_para.h | 5 +++++ > arch/x86/kernel/kvm.c | 4 ++++ > arch/x86/kvm/cpuid.c | 3 ++- > arch/x86/kvm/svm.c | 5 +++++ > arch/x86/kvm/x86.c | 7 +++++++ > arch/x86/mm/mem_encrypt.c | 14 +++++++++++++- > 9 files changed, 53 insertions(+), 2 deletions(-) IMHO, this patch should be broken into multiple patches as it touches guest, and hypervisor at the same time. The first patch can introduce the feature flag in the kvm, second patch can make the changes specific to svm, and third patch can focus on how to make use of that feature inside the guest. Additionally invoking the HC to clear the __bss_decrypted section should be either squash in Patch 10/14 or be a separate patch itself. > diff --git a/Documentation/virt/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst > index 01b081f6e7ea..fcb191bb3016 100644 > --- a/Documentation/virt/kvm/cpuid.rst > +++ b/Documentation/virt/kvm/cpuid.rst > @@ -86,6 +86,10 @@ KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit > before using paravirtualized > sched yield. > > +KVM_FEATURE_SEV_LIVE_MIGRATION 14 guest checks this feature bit > + before enabling SEV live > + migration feature. > + > KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side > per-cpu warps are expeced in > kvmclock > diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst > index 33892036672d..7cd7786bbb03 100644 > --- a/Documentation/virt/kvm/msr.rst > +++ b/Documentation/virt/kvm/msr.rst > @@ -319,3 +319,13 @@ data: > > KVM guests can request the host not to poll on HLT, for example if > they are performing polling themselves. > + > +MSR_KVM_SEV_LIVE_MIG_EN: > + 0x4b564d06 > + > + Control SEV Live Migration features. > + > +data: > + Bit 0 enables (1) or disables (0) host-side SEV Live Migration feature. > + Bit 1 enables (1) or disables (0) support for SEV Live Migration extensions. > + All other bits are reserved. > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index a96ef6338cd2..ad5faaed43c0 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -780,6 +780,9 @@ struct kvm_vcpu_arch { > > u64 msr_kvm_poll_control; > > + /* SEV Live Migration MSR (AMD only) */ > + u64 msr_kvm_sev_live_migration_flag; > + > /* > * Indicates the guest is trying to write a gfn that contains one or > * more of the PTEs used to translate the write itself, i.e. the access > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h > index 2a8e0b6b9805..d9d4953b42ad 100644 > --- a/arch/x86/include/uapi/asm/kvm_para.h > +++ b/arch/x86/include/uapi/asm/kvm_para.h > @@ -31,6 +31,7 @@ > #define KVM_FEATURE_PV_SEND_IPI 11 > #define KVM_FEATURE_POLL_CONTROL 12 > #define KVM_FEATURE_PV_SCHED_YIELD 13 > +#define KVM_FEATURE_SEV_LIVE_MIGRATION 14 > > #define KVM_HINTS_REALTIME 0 > > @@ -50,6 +51,7 @@ > #define MSR_KVM_STEAL_TIME 0x4b564d03 > #define MSR_KVM_PV_EOI_EN 0x4b564d04 > #define MSR_KVM_POLL_CONTROL 0x4b564d05 > +#define MSR_KVM_SEV_LIVE_MIG_EN 0x4b564d06 > > struct kvm_steal_time { > __u64 steal; > @@ -122,4 +124,7 @@ struct kvm_vcpu_pv_apf_data { > #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK > #define KVM_PV_EOI_DISABLED 0x0 > > +#define KVM_SEV_LIVE_MIGRATION_ENABLED (1 << 0) > +#define KVM_SEV_LIVE_MIGRATION_EXTENSIONS_SUPPORTED (1 << 1) > + > #endif /* _UAPI_ASM_X86_KVM_PARA_H */ > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index 6efe0410fb72..8fcee0b45231 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -418,6 +418,10 @@ static void __init sev_map_percpu_data(void) > if (!sev_active()) > return; > > + if (kvm_para_has_feature(KVM_FEATURE_SEV_LIVE_MIGRATION)) { > + wrmsrl(MSR_KVM_SEV_LIVE_MIG_EN, KVM_SEV_LIVE_MIGRATION_ENABLED); > + } > + > for_each_possible_cpu(cpu) { > __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason)); > __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time)); > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index b1c469446b07..74c8b2a7270c 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -716,7 +716,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, > (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | > (1 << KVM_FEATURE_PV_SEND_IPI) | > (1 << KVM_FEATURE_POLL_CONTROL) | > - (1 << KVM_FEATURE_PV_SCHED_YIELD); > + (1 << KVM_FEATURE_PV_SCHED_YIELD) | > + (1 << KVM_FEATURE_SEV_LIVE_MIGRATION); Do we want to enable this feature unconditionally ? Who will clear the feature flags for the non-SEV guest ? > > if (sched_info_on()) > entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index c99b0207a443..60ddc242a133 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -7632,6 +7632,7 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > unsigned long npages, unsigned long enc) > { > struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > + struct kvm_vcpu *vcpu = kvm->vcpus[0]; > kvm_pfn_t pfn_start, pfn_end; > gfn_t gfn_start, gfn_end; > int ret; > @@ -7639,6 +7640,10 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > if (!sev_guest(kvm)) > return -EINVAL; > > + if (!(vcpu->arch.msr_kvm_sev_live_migration_flag & > + KVM_SEV_LIVE_MIGRATION_ENABLED)) > + return -ENOTTY; > + > if (!npages) > return 0; > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 2127ed937f53..82867b8798f8 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -2880,6 +2880,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > vcpu->arch.msr_kvm_poll_control = data; > break; > > + case MSR_KVM_SEV_LIVE_MIG_EN: > + vcpu->arch.msr_kvm_sev_live_migration_flag = data; > + break; > + > case MSR_IA32_MCG_CTL: > case MSR_IA32_MCG_STATUS: > case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: > @@ -3126,6 +3130,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > case MSR_KVM_POLL_CONTROL: > msr_info->data = vcpu->arch.msr_kvm_poll_control; > break; > + case MSR_KVM_SEV_LIVE_MIG_EN: > + msr_info->data = vcpu->arch.msr_kvm_sev_live_migration_flag; > + break; > case MSR_IA32_P5_MC_ADDR: > case MSR_IA32_P5_MC_TYPE: > case MSR_IA32_MCG_CAP: > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c > index c9800fa811f6..f6a841494845 100644 > --- a/arch/x86/mm/mem_encrypt.c > +++ b/arch/x86/mm/mem_encrypt.c > @@ -502,8 +502,20 @@ void __init mem_encrypt_init(void) > * With SEV, we need to make a hypercall when page encryption state is > * changed. > */ > - if (sev_active()) > + if (sev_active()) { > + unsigned long nr_pages; > + > pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall; > + > + /* > + * Ensure that _bss_decrypted section is marked as decrypted in the > + * page encryption bitmap. > + */ > + nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted, > + PAGE_SIZE); > + set_memory_enc_dec_hypercall((unsigned long)__start_bss_decrypted, > + nr_pages, 0); > + } Isn't this too late, should we be making hypercall at the same time we clear the encryption bit ? > #endif > > pr_info("AMD %s active\n",
Hello Brijesh, On Mon, Mar 30, 2020 at 10:52:16AM -0500, Brijesh Singh wrote: > > On 3/30/20 1:23 AM, Ashish Kalra wrote: > > From: Ashish Kalra <ashish.kalra@amd.com> > > > > Add new KVM_FEATURE_SEV_LIVE_MIGRATION feature for guest to check > > for host-side support for SEV live migration. Also add a new custom > > MSR_KVM_SEV_LIVE_MIG_EN for guest to enable the SEV live migration > > feature. > > > > Also, ensure that _bss_decrypted section is marked as decrypted in the > > page encryption bitmap. > > > > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > > --- > > Documentation/virt/kvm/cpuid.rst | 4 ++++ > > Documentation/virt/kvm/msr.rst | 10 ++++++++++ > > arch/x86/include/asm/kvm_host.h | 3 +++ > > arch/x86/include/uapi/asm/kvm_para.h | 5 +++++ > > arch/x86/kernel/kvm.c | 4 ++++ > > arch/x86/kvm/cpuid.c | 3 ++- > > arch/x86/kvm/svm.c | 5 +++++ > > arch/x86/kvm/x86.c | 7 +++++++ > > arch/x86/mm/mem_encrypt.c | 14 +++++++++++++- > > 9 files changed, 53 insertions(+), 2 deletions(-) > > > IMHO, this patch should be broken into multiple patches as it touches > guest, and hypervisor at the same time. The first patch can introduce > the feature flag in the kvm, second patch can make the changes specific > to svm, and third patch can focus on how to make use of that feature > inside the guest. Additionally invoking the HC to clear the > __bss_decrypted section should be either squash in Patch 10/14 or be a > separate patch itself. > > Ok. I will also move the __bss_decrypted section HC to a separate patch. > > diff --git a/Documentation/virt/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst > > index 01b081f6e7ea..fcb191bb3016 100644 > > --- a/Documentation/virt/kvm/cpuid.rst > > +++ b/Documentation/virt/kvm/cpuid.rst > > @@ -86,6 +86,10 @@ KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit > > before using paravirtualized > > sched yield. > > > > +KVM_FEATURE_SEV_LIVE_MIGRATION 14 guest checks this feature bit > > + before enabling SEV live > > + migration feature. > > + > > KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side > > per-cpu warps are expeced in > > kvmclock > > diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst > > index 33892036672d..7cd7786bbb03 100644 > > --- a/Documentation/virt/kvm/msr.rst > > +++ b/Documentation/virt/kvm/msr.rst > > @@ -319,3 +319,13 @@ data: > > > > KVM guests can request the host not to poll on HLT, for example if > > they are performing polling themselves. > > + > > +MSR_KVM_SEV_LIVE_MIG_EN: > > + 0x4b564d06 > > + > > + Control SEV Live Migration features. > > + > > +data: > > + Bit 0 enables (1) or disables (0) host-side SEV Live Migration feature. > > + Bit 1 enables (1) or disables (0) support for SEV Live Migration extensions. > > + All other bits are reserved. > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index a96ef6338cd2..ad5faaed43c0 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -780,6 +780,9 @@ struct kvm_vcpu_arch { > > > > u64 msr_kvm_poll_control; > > > > + /* SEV Live Migration MSR (AMD only) */ > > + u64 msr_kvm_sev_live_migration_flag; > > + > > /* > > * Indicates the guest is trying to write a gfn that contains one or > > * more of the PTEs used to translate the write itself, i.e. the access > > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h > > index 2a8e0b6b9805..d9d4953b42ad 100644 > > --- a/arch/x86/include/uapi/asm/kvm_para.h > > +++ b/arch/x86/include/uapi/asm/kvm_para.h > > @@ -31,6 +31,7 @@ > > #define KVM_FEATURE_PV_SEND_IPI 11 > > #define KVM_FEATURE_POLL_CONTROL 12 > > #define KVM_FEATURE_PV_SCHED_YIELD 13 > > +#define KVM_FEATURE_SEV_LIVE_MIGRATION 14 > > > > #define KVM_HINTS_REALTIME 0 > > > > @@ -50,6 +51,7 @@ > > #define MSR_KVM_STEAL_TIME 0x4b564d03 > > #define MSR_KVM_PV_EOI_EN 0x4b564d04 > > #define MSR_KVM_POLL_CONTROL 0x4b564d05 > > +#define MSR_KVM_SEV_LIVE_MIG_EN 0x4b564d06 > > > > struct kvm_steal_time { > > __u64 steal; > > @@ -122,4 +124,7 @@ struct kvm_vcpu_pv_apf_data { > > #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK > > #define KVM_PV_EOI_DISABLED 0x0 > > > > +#define KVM_SEV_LIVE_MIGRATION_ENABLED (1 << 0) > > +#define KVM_SEV_LIVE_MIGRATION_EXTENSIONS_SUPPORTED (1 << 1) > > + > > #endif /* _UAPI_ASM_X86_KVM_PARA_H */ > > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > > index 6efe0410fb72..8fcee0b45231 100644 > > --- a/arch/x86/kernel/kvm.c > > +++ b/arch/x86/kernel/kvm.c > > @@ -418,6 +418,10 @@ static void __init sev_map_percpu_data(void) > > if (!sev_active()) > > return; > > > > + if (kvm_para_has_feature(KVM_FEATURE_SEV_LIVE_MIGRATION)) { > > + wrmsrl(MSR_KVM_SEV_LIVE_MIG_EN, KVM_SEV_LIVE_MIGRATION_ENABLED); > > + } > > + > > for_each_possible_cpu(cpu) { > > __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason)); > > __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time)); > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > > index b1c469446b07..74c8b2a7270c 100644 > > --- a/arch/x86/kvm/cpuid.c > > +++ b/arch/x86/kvm/cpuid.c > > @@ -716,7 +716,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, > > (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | > > (1 << KVM_FEATURE_PV_SEND_IPI) | > > (1 << KVM_FEATURE_POLL_CONTROL) | > > - (1 << KVM_FEATURE_PV_SCHED_YIELD); > > + (1 << KVM_FEATURE_PV_SCHED_YIELD) | > > + (1 << KVM_FEATURE_SEV_LIVE_MIGRATION); > > > Do we want to enable this feature unconditionally ? Who will clear the > feature flags for the non-SEV guest ? > The guest only enables/activates this feature if sev is active. > > > > if (sched_info_on()) > > entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > > index c99b0207a443..60ddc242a133 100644 > > --- a/arch/x86/kvm/svm.c > > +++ b/arch/x86/kvm/svm.c > > @@ -7632,6 +7632,7 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > > unsigned long npages, unsigned long enc) > > { > > struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > > + struct kvm_vcpu *vcpu = kvm->vcpus[0]; > > kvm_pfn_t pfn_start, pfn_end; > > gfn_t gfn_start, gfn_end; > > int ret; > > @@ -7639,6 +7640,10 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > > if (!sev_guest(kvm)) > > return -EINVAL; > > > > + if (!(vcpu->arch.msr_kvm_sev_live_migration_flag & > > + KVM_SEV_LIVE_MIGRATION_ENABLED)) > > + return -ENOTTY; > > + > > if (!npages) > > return 0; > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 2127ed937f53..82867b8798f8 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -2880,6 +2880,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > > vcpu->arch.msr_kvm_poll_control = data; > > break; > > > > + case MSR_KVM_SEV_LIVE_MIG_EN: > > + vcpu->arch.msr_kvm_sev_live_migration_flag = data; > > + break; > > + > > case MSR_IA32_MCG_CTL: > > case MSR_IA32_MCG_STATUS: > > case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: > > @@ -3126,6 +3130,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > > case MSR_KVM_POLL_CONTROL: > > msr_info->data = vcpu->arch.msr_kvm_poll_control; > > break; > > + case MSR_KVM_SEV_LIVE_MIG_EN: > > + msr_info->data = vcpu->arch.msr_kvm_sev_live_migration_flag; > > + break; > > case MSR_IA32_P5_MC_ADDR: > > case MSR_IA32_P5_MC_TYPE: > > case MSR_IA32_MCG_CAP: > > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c > > index c9800fa811f6..f6a841494845 100644 > > --- a/arch/x86/mm/mem_encrypt.c > > +++ b/arch/x86/mm/mem_encrypt.c > > @@ -502,8 +502,20 @@ void __init mem_encrypt_init(void) > > * With SEV, we need to make a hypercall when page encryption state is > > * changed. > > */ > > - if (sev_active()) > > + if (sev_active()) { > > + unsigned long nr_pages; > > + > > pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall; > > + > > + /* > > + * Ensure that _bss_decrypted section is marked as decrypted in the > > + * page encryption bitmap. > > + */ > > + nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted, > > + PAGE_SIZE); > > + set_memory_enc_dec_hypercall((unsigned long)__start_bss_decrypted, > > + nr_pages, 0); > > + } > > > Isn't this too late, should we be making hypercall at the same time we > clear the encryption bit ? > > Actually this is being done somewhat lazily, after the guest enables/activates the live migration feature, it should be fine to do it here or it can be moved into sev_map_percpu_data() where the first hypercalls are done, in both cases the __bss_decrypted section will be marked before the live migration process is initiated. > > #endif > > > > pr_info("AMD %s active\n", Thanks, Ashish
Hello Brijesh, > > On Tue, Mar 31, 2020 at 05:13:36PM +0000, Ashish Kalra wrote: > > Hello Brijesh, > > > > > > Actually this is being done somewhat lazily, after the guest > > > > enables/activates the live migration feature, it should be fine to do it > > > > here or it can be moved into sev_map_percpu_data() where the first > > > > hypercalls are done, in both cases the __bss_decrypted section will be > > > > marked before the live migration process is initiated. > > > > > > > > > IMO, its not okay to do it here or inside sev_map_percpu_data(). So far, > > > as soon as C-bit state is changed in page table we make a hypercall. It > > > will be good idea to stick to that approach. I don't see any reason why > > > we need to make an exception for the __bss_decrypted unless I am missing > > > something. What will happen if VMM initiate the migration while guest > > > BIOS is booting? Are you saying its not supported ? > > > > > > > The one thing this will require is checking for KVM para capability > > KVM_FEATURE_SEV_LIVE_MIGRATION as part of this code in startup_64(), i > > need to verify if i can check for this feature so early in startup code. > > > > I need to check for this capability and do the wrmsrl() here as this > > will be the 1st hypercall in the guest kernel and i will need to > > enable live migration feature and hypercall support on the host > > before making the hypercall. > > I added the KVM para feature capability check here in startup_64(), and as i thought this does "not" work and also as a side effect disables the KVM paravirtualization check and so KVM paravirtualization is not detected later during kernel boot and all KVM paravirt features remain disabled. Digged deeper into this and here's what happens ... kvm_para_has_feature() calls kvm_arch_para_feature() which in turn calls kvm_cpuid_base() and this invokes __kvm_cpuid_base(). As the "boot_cpu_data" is still not populated/setup, therefore, __kvm_cpuid_base() does not detect X86_FEATURE_HYPERVISOR and also as a side effect sets the variable kvm_cpuid_base == 0. So as the kvm_para_feature() is not detected in startup_64(), therefore the hypercall does not get invoked and also as the side effect of calling kvm_para_feature() in startup_64(), the static variable "kvm_cpuid_base" gets set to 0, and later during hypervisor detection (kvm_detect), this variable's setting causes kvm_detect() to return failure and hence KVM paravirtualization features don't get enabled for the guest kernel. So, calling kvm_para_has_feature() so early in startup_64() code is not going to work, hence, it is probably best to do the hypercall to mark __bss_decrypted section as decrypted (lazily) as part of sev_map_percpu_data() as per my original thought. Thanks, Ashish
On 3/29/20 11:23 PM, Ashish Kalra wrote: > From: Ashish Kalra <ashish.kalra@amd.com> > > Add new KVM_FEATURE_SEV_LIVE_MIGRATION feature for guest to check > for host-side support for SEV live migration. Also add a new custom > MSR_KVM_SEV_LIVE_MIG_EN for guest to enable the SEV live migration > feature. > > Also, ensure that _bss_decrypted section is marked as decrypted in the > page encryption bitmap. > > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > --- > Documentation/virt/kvm/cpuid.rst | 4 ++++ > Documentation/virt/kvm/msr.rst | 10 ++++++++++ > arch/x86/include/asm/kvm_host.h | 3 +++ > arch/x86/include/uapi/asm/kvm_para.h | 5 +++++ > arch/x86/kernel/kvm.c | 4 ++++ > arch/x86/kvm/cpuid.c | 3 ++- > arch/x86/kvm/svm.c | 5 +++++ > arch/x86/kvm/x86.c | 7 +++++++ > arch/x86/mm/mem_encrypt.c | 14 +++++++++++++- > 9 files changed, 53 insertions(+), 2 deletions(-) > > diff --git a/Documentation/virt/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst > index 01b081f6e7ea..fcb191bb3016 100644 > --- a/Documentation/virt/kvm/cpuid.rst > +++ b/Documentation/virt/kvm/cpuid.rst > @@ -86,6 +86,10 @@ KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit > before using paravirtualized > sched yield. > > +KVM_FEATURE_SEV_LIVE_MIGRATION 14 guest checks this feature bit > + before enabling SEV live > + migration feature. > + > KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side > per-cpu warps are expeced in > kvmclock > diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst > index 33892036672d..7cd7786bbb03 100644 > --- a/Documentation/virt/kvm/msr.rst > +++ b/Documentation/virt/kvm/msr.rst > @@ -319,3 +319,13 @@ data: > > KVM guests can request the host not to poll on HLT, for example if > they are performing polling themselves. > + > +MSR_KVM_SEV_LIVE_MIG_EN: > + 0x4b564d06 > + > + Control SEV Live Migration features. > + > +data: > + Bit 0 enables (1) or disables (0) host-side SEV Live Migration feature. > + Bit 1 enables (1) or disables (0) support for SEV Live Migration extensions. > + All other bits are reserved. > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index a96ef6338cd2..ad5faaed43c0 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -780,6 +780,9 @@ struct kvm_vcpu_arch { > > u64 msr_kvm_poll_control; > > + /* SEV Live Migration MSR (AMD only) */ > + u64 msr_kvm_sev_live_migration_flag; > + > /* > * Indicates the guest is trying to write a gfn that contains one or > * more of the PTEs used to translate the write itself, i.e. the access > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h > index 2a8e0b6b9805..d9d4953b42ad 100644 > --- a/arch/x86/include/uapi/asm/kvm_para.h > +++ b/arch/x86/include/uapi/asm/kvm_para.h > @@ -31,6 +31,7 @@ > #define KVM_FEATURE_PV_SEND_IPI 11 > #define KVM_FEATURE_POLL_CONTROL 12 > #define KVM_FEATURE_PV_SCHED_YIELD 13 > +#define KVM_FEATURE_SEV_LIVE_MIGRATION 14 > > #define KVM_HINTS_REALTIME 0 > > @@ -50,6 +51,7 @@ > #define MSR_KVM_STEAL_TIME 0x4b564d03 > #define MSR_KVM_PV_EOI_EN 0x4b564d04 > #define MSR_KVM_POLL_CONTROL 0x4b564d05 > +#define MSR_KVM_SEV_LIVE_MIG_EN 0x4b564d06 > > struct kvm_steal_time { > __u64 steal; > @@ -122,4 +124,7 @@ struct kvm_vcpu_pv_apf_data { > #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK > #define KVM_PV_EOI_DISABLED 0x0 > > +#define KVM_SEV_LIVE_MIGRATION_ENABLED (1 << 0) > +#define KVM_SEV_LIVE_MIGRATION_EXTENSIONS_SUPPORTED (1 << 1) > + > #endif /* _UAPI_ASM_X86_KVM_PARA_H */ > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index 6efe0410fb72..8fcee0b45231 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -418,6 +418,10 @@ static void __init sev_map_percpu_data(void) > if (!sev_active()) > return; > > + if (kvm_para_has_feature(KVM_FEATURE_SEV_LIVE_MIGRATION)) { > + wrmsrl(MSR_KVM_SEV_LIVE_MIG_EN, KVM_SEV_LIVE_MIGRATION_ENABLED); > + } > + > for_each_possible_cpu(cpu) { > __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason)); > __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time)); > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index b1c469446b07..74c8b2a7270c 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -716,7 +716,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, > (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | > (1 << KVM_FEATURE_PV_SEND_IPI) | > (1 << KVM_FEATURE_POLL_CONTROL) | > - (1 << KVM_FEATURE_PV_SCHED_YIELD); > + (1 << KVM_FEATURE_PV_SCHED_YIELD) | > + (1 << KVM_FEATURE_SEV_LIVE_MIGRATION); > > if (sched_info_on()) > entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index c99b0207a443..60ddc242a133 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -7632,6 +7632,7 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > unsigned long npages, unsigned long enc) > { > struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > + struct kvm_vcpu *vcpu = kvm->vcpus[0]; > kvm_pfn_t pfn_start, pfn_end; > gfn_t gfn_start, gfn_end; > int ret; > @@ -7639,6 +7640,10 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > if (!sev_guest(kvm)) > return -EINVAL; > > + if (!(vcpu->arch.msr_kvm_sev_live_migration_flag & > + KVM_SEV_LIVE_MIGRATION_ENABLED)) > + return -ENOTTY; > + > if (!npages) > return 0; > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 2127ed937f53..82867b8798f8 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -2880,6 +2880,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > vcpu->arch.msr_kvm_poll_control = data; > break; > > + case MSR_KVM_SEV_LIVE_MIG_EN: > + vcpu->arch.msr_kvm_sev_live_migration_flag = data; > + break; > + > case MSR_IA32_MCG_CTL: > case MSR_IA32_MCG_STATUS: > case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: > @@ -3126,6 +3130,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > case MSR_KVM_POLL_CONTROL: > msr_info->data = vcpu->arch.msr_kvm_poll_control; > break; > + case MSR_KVM_SEV_LIVE_MIG_EN: > + msr_info->data = vcpu->arch.msr_kvm_sev_live_migration_flag; > + break; > case MSR_IA32_P5_MC_ADDR: > case MSR_IA32_P5_MC_TYPE: > case MSR_IA32_MCG_CAP: > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c > index c9800fa811f6..f6a841494845 100644 > --- a/arch/x86/mm/mem_encrypt.c > +++ b/arch/x86/mm/mem_encrypt.c > @@ -502,8 +502,20 @@ void __init mem_encrypt_init(void) > * With SEV, we need to make a hypercall when page encryption state is > * changed. > */ > - if (sev_active()) > + if (sev_active()) { > + unsigned long nr_pages; > + > pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall; > + > + /* > + * Ensure that _bss_decrypted section is marked as decrypted in the > + * page encryption bitmap. > + */ > + nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted, > + PAGE_SIZE); > + set_memory_enc_dec_hypercall((unsigned long)__start_bss_decrypted, > + nr_pages, 0); > + } > #endif > > pr_info("AMD %s active\n", Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
diff --git a/Documentation/virt/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst index 01b081f6e7ea..fcb191bb3016 100644 --- a/Documentation/virt/kvm/cpuid.rst +++ b/Documentation/virt/kvm/cpuid.rst @@ -86,6 +86,10 @@ KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit before using paravirtualized sched yield. +KVM_FEATURE_SEV_LIVE_MIGRATION 14 guest checks this feature bit + before enabling SEV live + migration feature. + KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side per-cpu warps are expeced in kvmclock diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst index 33892036672d..7cd7786bbb03 100644 --- a/Documentation/virt/kvm/msr.rst +++ b/Documentation/virt/kvm/msr.rst @@ -319,3 +319,13 @@ data: KVM guests can request the host not to poll on HLT, for example if they are performing polling themselves. + +MSR_KVM_SEV_LIVE_MIG_EN: + 0x4b564d06 + + Control SEV Live Migration features. + +data: + Bit 0 enables (1) or disables (0) host-side SEV Live Migration feature. + Bit 1 enables (1) or disables (0) support for SEV Live Migration extensions. + All other bits are reserved. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a96ef6338cd2..ad5faaed43c0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -780,6 +780,9 @@ struct kvm_vcpu_arch { u64 msr_kvm_poll_control; + /* SEV Live Migration MSR (AMD only) */ + u64 msr_kvm_sev_live_migration_flag; + /* * Indicates the guest is trying to write a gfn that contains one or * more of the PTEs used to translate the write itself, i.e. the access diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 2a8e0b6b9805..d9d4953b42ad 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -31,6 +31,7 @@ #define KVM_FEATURE_PV_SEND_IPI 11 #define KVM_FEATURE_POLL_CONTROL 12 #define KVM_FEATURE_PV_SCHED_YIELD 13 +#define KVM_FEATURE_SEV_LIVE_MIGRATION 14 #define KVM_HINTS_REALTIME 0 @@ -50,6 +51,7 @@ #define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_PV_EOI_EN 0x4b564d04 #define MSR_KVM_POLL_CONTROL 0x4b564d05 +#define MSR_KVM_SEV_LIVE_MIG_EN 0x4b564d06 struct kvm_steal_time { __u64 steal; @@ -122,4 +124,7 @@ struct kvm_vcpu_pv_apf_data { #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK #define KVM_PV_EOI_DISABLED 0x0 +#define KVM_SEV_LIVE_MIGRATION_ENABLED (1 << 0) +#define KVM_SEV_LIVE_MIGRATION_EXTENSIONS_SUPPORTED (1 << 1) + #endif /* _UAPI_ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 6efe0410fb72..8fcee0b45231 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -418,6 +418,10 @@ static void __init sev_map_percpu_data(void) if (!sev_active()) return; + if (kvm_para_has_feature(KVM_FEATURE_SEV_LIVE_MIGRATION)) { + wrmsrl(MSR_KVM_SEV_LIVE_MIG_EN, KVM_SEV_LIVE_MIGRATION_ENABLED); + } + for_each_possible_cpu(cpu) { __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason)); __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time)); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b1c469446b07..74c8b2a7270c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -716,7 +716,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | (1 << KVM_FEATURE_PV_SEND_IPI) | (1 << KVM_FEATURE_POLL_CONTROL) | - (1 << KVM_FEATURE_PV_SCHED_YIELD); + (1 << KVM_FEATURE_PV_SCHED_YIELD) | + (1 << KVM_FEATURE_SEV_LIVE_MIGRATION); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c99b0207a443..60ddc242a133 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7632,6 +7632,7 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, unsigned long npages, unsigned long enc) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct kvm_vcpu *vcpu = kvm->vcpus[0]; kvm_pfn_t pfn_start, pfn_end; gfn_t gfn_start, gfn_end; int ret; @@ -7639,6 +7640,10 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, if (!sev_guest(kvm)) return -EINVAL; + if (!(vcpu->arch.msr_kvm_sev_live_migration_flag & + KVM_SEV_LIVE_MIGRATION_ENABLED)) + return -ENOTTY; + if (!npages) return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2127ed937f53..82867b8798f8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2880,6 +2880,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.msr_kvm_poll_control = data; break; + case MSR_KVM_SEV_LIVE_MIG_EN: + vcpu->arch.msr_kvm_sev_live_migration_flag = data; + break; + case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: @@ -3126,6 +3130,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_KVM_POLL_CONTROL: msr_info->data = vcpu->arch.msr_kvm_poll_control; break; + case MSR_KVM_SEV_LIVE_MIG_EN: + msr_info->data = vcpu->arch.msr_kvm_sev_live_migration_flag; + break; case MSR_IA32_P5_MC_ADDR: case MSR_IA32_P5_MC_TYPE: case MSR_IA32_MCG_CAP: diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index c9800fa811f6..f6a841494845 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -502,8 +502,20 @@ void __init mem_encrypt_init(void) * With SEV, we need to make a hypercall when page encryption state is * changed. */ - if (sev_active()) + if (sev_active()) { + unsigned long nr_pages; + pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall; + + /* + * Ensure that _bss_decrypted section is marked as decrypted in the + * page encryption bitmap. + */ + nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted, + PAGE_SIZE); + set_memory_enc_dec_hypercall((unsigned long)__start_bss_decrypted, + nr_pages, 0); + } #endif pr_info("AMD %s active\n",