diff mbox series

[v6,13/14] KVM: x86: Introduce new KVM_FEATURE_SEV_LIVE_MIGRATION feature & Custom MSR.

Message ID 21b2119906f4cce10b4133107ece3dab0957c07c.1585548051.git.ashish.kalra@amd.com (mailing list archive)
State New, archived
Headers show
Series Add AMD SEV guest live migration support | expand

Commit Message

Kalra, Ashish March 30, 2020, 6:23 a.m. UTC
From: Ashish Kalra <ashish.kalra@amd.com>

Add new KVM_FEATURE_SEV_LIVE_MIGRATION feature for guest to check
for host-side support for SEV live migration. Also add a new custom
MSR_KVM_SEV_LIVE_MIG_EN for guest to enable the SEV live migration
feature.

Also, ensure that _bss_decrypted section is marked as decrypted in the
page encryption bitmap.

Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 Documentation/virt/kvm/cpuid.rst     |  4 ++++
 Documentation/virt/kvm/msr.rst       | 10 ++++++++++
 arch/x86/include/asm/kvm_host.h      |  3 +++
 arch/x86/include/uapi/asm/kvm_para.h |  5 +++++
 arch/x86/kernel/kvm.c                |  4 ++++
 arch/x86/kvm/cpuid.c                 |  3 ++-
 arch/x86/kvm/svm.c                   |  5 +++++
 arch/x86/kvm/x86.c                   |  7 +++++++
 arch/x86/mm/mem_encrypt.c            | 14 +++++++++++++-
 9 files changed, 53 insertions(+), 2 deletions(-)

Comments

Brijesh Singh March 30, 2020, 3:52 p.m. UTC | #1
On 3/30/20 1:23 AM, Ashish Kalra wrote:
> From: Ashish Kalra <ashish.kalra@amd.com>
>
> Add new KVM_FEATURE_SEV_LIVE_MIGRATION feature for guest to check
> for host-side support for SEV live migration. Also add a new custom
> MSR_KVM_SEV_LIVE_MIG_EN for guest to enable the SEV live migration
> feature.
>
> Also, ensure that _bss_decrypted section is marked as decrypted in the
> page encryption bitmap.
>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> ---
>  Documentation/virt/kvm/cpuid.rst     |  4 ++++
>  Documentation/virt/kvm/msr.rst       | 10 ++++++++++
>  arch/x86/include/asm/kvm_host.h      |  3 +++
>  arch/x86/include/uapi/asm/kvm_para.h |  5 +++++
>  arch/x86/kernel/kvm.c                |  4 ++++
>  arch/x86/kvm/cpuid.c                 |  3 ++-
>  arch/x86/kvm/svm.c                   |  5 +++++
>  arch/x86/kvm/x86.c                   |  7 +++++++
>  arch/x86/mm/mem_encrypt.c            | 14 +++++++++++++-
>  9 files changed, 53 insertions(+), 2 deletions(-)


IMHO, this patch should be broken into multiple patches as it touches
guest, and hypervisor at the same time. The first patch can introduce
the feature flag in the kvm, second patch can make the changes specific
to svm,  and third patch can focus on how to make use of that feature
inside the guest. Additionally invoking the HC to clear the
__bss_decrypted section should be either squash in Patch 10/14 or be a
separate patch itself.


> diff --git a/Documentation/virt/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst
> index 01b081f6e7ea..fcb191bb3016 100644
> --- a/Documentation/virt/kvm/cpuid.rst
> +++ b/Documentation/virt/kvm/cpuid.rst
> @@ -86,6 +86,10 @@ KVM_FEATURE_PV_SCHED_YIELD        13          guest checks this feature bit
>                                                before using paravirtualized
>                                                sched yield.
>  
> +KVM_FEATURE_SEV_LIVE_MIGRATION    14          guest checks this feature bit
> +                                              before enabling SEV live
> +                                              migration feature.
> +
>  KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24          host will warn if no guest-side
>                                                per-cpu warps are expeced in
>                                                kvmclock
> diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst
> index 33892036672d..7cd7786bbb03 100644
> --- a/Documentation/virt/kvm/msr.rst
> +++ b/Documentation/virt/kvm/msr.rst
> @@ -319,3 +319,13 @@ data:
>  
>  	KVM guests can request the host not to poll on HLT, for example if
>  	they are performing polling themselves.
> +
> +MSR_KVM_SEV_LIVE_MIG_EN:
> +        0x4b564d06
> +
> +	Control SEV Live Migration features.
> +
> +data:
> +        Bit 0 enables (1) or disables (0) host-side SEV Live Migration feature.
> +        Bit 1 enables (1) or disables (0) support for SEV Live Migration extensions.
> +        All other bits are reserved.
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index a96ef6338cd2..ad5faaed43c0 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -780,6 +780,9 @@ struct kvm_vcpu_arch {
>  
>  	u64 msr_kvm_poll_control;
>  
> +	/* SEV Live Migration MSR (AMD only) */
> +	u64 msr_kvm_sev_live_migration_flag;
> +
>  	/*
>  	 * Indicates the guest is trying to write a gfn that contains one or
>  	 * more of the PTEs used to translate the write itself, i.e. the access
> diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> index 2a8e0b6b9805..d9d4953b42ad 100644
> --- a/arch/x86/include/uapi/asm/kvm_para.h
> +++ b/arch/x86/include/uapi/asm/kvm_para.h
> @@ -31,6 +31,7 @@
>  #define KVM_FEATURE_PV_SEND_IPI	11
>  #define KVM_FEATURE_POLL_CONTROL	12
>  #define KVM_FEATURE_PV_SCHED_YIELD	13
> +#define KVM_FEATURE_SEV_LIVE_MIGRATION	14
>  
>  #define KVM_HINTS_REALTIME      0
>  
> @@ -50,6 +51,7 @@
>  #define MSR_KVM_STEAL_TIME  0x4b564d03
>  #define MSR_KVM_PV_EOI_EN      0x4b564d04
>  #define MSR_KVM_POLL_CONTROL	0x4b564d05
> +#define MSR_KVM_SEV_LIVE_MIG_EN	0x4b564d06
>  
>  struct kvm_steal_time {
>  	__u64 steal;
> @@ -122,4 +124,7 @@ struct kvm_vcpu_pv_apf_data {
>  #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
>  #define KVM_PV_EOI_DISABLED 0x0
>  
> +#define KVM_SEV_LIVE_MIGRATION_ENABLED			(1 << 0)
> +#define KVM_SEV_LIVE_MIGRATION_EXTENSIONS_SUPPORTED	(1 << 1)
> +
>  #endif /* _UAPI_ASM_X86_KVM_PARA_H */
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 6efe0410fb72..8fcee0b45231 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -418,6 +418,10 @@ static void __init sev_map_percpu_data(void)
>  	if (!sev_active())
>  		return;
>  
> +	if (kvm_para_has_feature(KVM_FEATURE_SEV_LIVE_MIGRATION)) {
> +		wrmsrl(MSR_KVM_SEV_LIVE_MIG_EN, KVM_SEV_LIVE_MIGRATION_ENABLED);
> +	}
> +
>  	for_each_possible_cpu(cpu) {
>  		__set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
>  		__set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index b1c469446b07..74c8b2a7270c 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -716,7 +716,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
>  			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
>  			     (1 << KVM_FEATURE_PV_SEND_IPI) |
>  			     (1 << KVM_FEATURE_POLL_CONTROL) |
> -			     (1 << KVM_FEATURE_PV_SCHED_YIELD);
> +			     (1 << KVM_FEATURE_PV_SCHED_YIELD) |
> +			     (1 << KVM_FEATURE_SEV_LIVE_MIGRATION);


Do we want to enable this feature unconditionally ? Who will clear the
feature flags for the non-SEV guest ?

>  
>  		if (sched_info_on())
>  			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index c99b0207a443..60ddc242a133 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -7632,6 +7632,7 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
>  				  unsigned long npages, unsigned long enc)
>  {
>  	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +	struct kvm_vcpu *vcpu = kvm->vcpus[0];
>  	kvm_pfn_t pfn_start, pfn_end;
>  	gfn_t gfn_start, gfn_end;
>  	int ret;
> @@ -7639,6 +7640,10 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
>  	if (!sev_guest(kvm))
>  		return -EINVAL;
>  
> +	if (!(vcpu->arch.msr_kvm_sev_live_migration_flag &
> +		KVM_SEV_LIVE_MIGRATION_ENABLED))
> +		return -ENOTTY;
> +
>  	if (!npages)
>  		return 0;
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 2127ed937f53..82867b8798f8 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2880,6 +2880,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		vcpu->arch.msr_kvm_poll_control = data;
>  		break;
>  
> +	case MSR_KVM_SEV_LIVE_MIG_EN:
> +		vcpu->arch.msr_kvm_sev_live_migration_flag = data;
> +		break;
> +
>  	case MSR_IA32_MCG_CTL:
>  	case MSR_IA32_MCG_STATUS:
>  	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
> @@ -3126,6 +3130,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	case MSR_KVM_POLL_CONTROL:
>  		msr_info->data = vcpu->arch.msr_kvm_poll_control;
>  		break;
> +	case MSR_KVM_SEV_LIVE_MIG_EN:
> +		msr_info->data = vcpu->arch.msr_kvm_sev_live_migration_flag;
> +		break;
>  	case MSR_IA32_P5_MC_ADDR:
>  	case MSR_IA32_P5_MC_TYPE:
>  	case MSR_IA32_MCG_CAP:
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index c9800fa811f6..f6a841494845 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -502,8 +502,20 @@ void __init mem_encrypt_init(void)
>  	 * With SEV, we need to make a hypercall when page encryption state is
>  	 * changed.
>  	 */
> -	if (sev_active())
> +	if (sev_active()) {
> +		unsigned long nr_pages;
> +
>  		pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall;
> +
> +		/*
> +		 * Ensure that _bss_decrypted section is marked as decrypted in the
> +		 * page encryption bitmap.
> +		 */
> +		nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
> +			PAGE_SIZE);
> +		set_memory_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
> +			nr_pages, 0);
> +	}


Isn't this too late, should we be making hypercall at the same time we
clear the encryption bit ?


>  #endif
>  
>  	pr_info("AMD %s active\n",
Kalra, Ashish March 30, 2020, 4:42 p.m. UTC | #2
Hello Brijesh,

On Mon, Mar 30, 2020 at 10:52:16AM -0500, Brijesh Singh wrote:
> 
> On 3/30/20 1:23 AM, Ashish Kalra wrote:
> > From: Ashish Kalra <ashish.kalra@amd.com>
> >
> > Add new KVM_FEATURE_SEV_LIVE_MIGRATION feature for guest to check
> > for host-side support for SEV live migration. Also add a new custom
> > MSR_KVM_SEV_LIVE_MIG_EN for guest to enable the SEV live migration
> > feature.
> >
> > Also, ensure that _bss_decrypted section is marked as decrypted in the
> > page encryption bitmap.
> >
> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > ---
> >  Documentation/virt/kvm/cpuid.rst     |  4 ++++
> >  Documentation/virt/kvm/msr.rst       | 10 ++++++++++
> >  arch/x86/include/asm/kvm_host.h      |  3 +++
> >  arch/x86/include/uapi/asm/kvm_para.h |  5 +++++
> >  arch/x86/kernel/kvm.c                |  4 ++++
> >  arch/x86/kvm/cpuid.c                 |  3 ++-
> >  arch/x86/kvm/svm.c                   |  5 +++++
> >  arch/x86/kvm/x86.c                   |  7 +++++++
> >  arch/x86/mm/mem_encrypt.c            | 14 +++++++++++++-
> >  9 files changed, 53 insertions(+), 2 deletions(-)
> 
> 
> IMHO, this patch should be broken into multiple patches as it touches
> guest, and hypervisor at the same time. The first patch can introduce
> the feature flag in the kvm, second patch can make the changes specific
> to svm,  and third patch can focus on how to make use of that feature
> inside the guest. Additionally invoking the HC to clear the
> __bss_decrypted section should be either squash in Patch 10/14 or be a
> separate patch itself.
> 
> 

Ok.

I will also move the __bss_decrypted section HC to a separate patch.

> > diff --git a/Documentation/virt/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst
> > index 01b081f6e7ea..fcb191bb3016 100644
> > --- a/Documentation/virt/kvm/cpuid.rst
> > +++ b/Documentation/virt/kvm/cpuid.rst
> > @@ -86,6 +86,10 @@ KVM_FEATURE_PV_SCHED_YIELD        13          guest checks this feature bit
> >                                                before using paravirtualized
> >                                                sched yield.
> >  
> > +KVM_FEATURE_SEV_LIVE_MIGRATION    14          guest checks this feature bit
> > +                                              before enabling SEV live
> > +                                              migration feature.
> > +
> >  KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24          host will warn if no guest-side
> >                                                per-cpu warps are expeced in
> >                                                kvmclock
> > diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst
> > index 33892036672d..7cd7786bbb03 100644
> > --- a/Documentation/virt/kvm/msr.rst
> > +++ b/Documentation/virt/kvm/msr.rst
> > @@ -319,3 +319,13 @@ data:
> >  
> >  	KVM guests can request the host not to poll on HLT, for example if
> >  	they are performing polling themselves.
> > +
> > +MSR_KVM_SEV_LIVE_MIG_EN:
> > +        0x4b564d06
> > +
> > +	Control SEV Live Migration features.
> > +
> > +data:
> > +        Bit 0 enables (1) or disables (0) host-side SEV Live Migration feature.
> > +        Bit 1 enables (1) or disables (0) support for SEV Live Migration extensions.
> > +        All other bits are reserved.
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index a96ef6338cd2..ad5faaed43c0 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -780,6 +780,9 @@ struct kvm_vcpu_arch {
> >  
> >  	u64 msr_kvm_poll_control;
> >  
> > +	/* SEV Live Migration MSR (AMD only) */
> > +	u64 msr_kvm_sev_live_migration_flag;
> > +
> >  	/*
> >  	 * Indicates the guest is trying to write a gfn that contains one or
> >  	 * more of the PTEs used to translate the write itself, i.e. the access
> > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> > index 2a8e0b6b9805..d9d4953b42ad 100644
> > --- a/arch/x86/include/uapi/asm/kvm_para.h
> > +++ b/arch/x86/include/uapi/asm/kvm_para.h
> > @@ -31,6 +31,7 @@
> >  #define KVM_FEATURE_PV_SEND_IPI	11
> >  #define KVM_FEATURE_POLL_CONTROL	12
> >  #define KVM_FEATURE_PV_SCHED_YIELD	13
> > +#define KVM_FEATURE_SEV_LIVE_MIGRATION	14
> >  
> >  #define KVM_HINTS_REALTIME      0
> >  
> > @@ -50,6 +51,7 @@
> >  #define MSR_KVM_STEAL_TIME  0x4b564d03
> >  #define MSR_KVM_PV_EOI_EN      0x4b564d04
> >  #define MSR_KVM_POLL_CONTROL	0x4b564d05
> > +#define MSR_KVM_SEV_LIVE_MIG_EN	0x4b564d06
> >  
> >  struct kvm_steal_time {
> >  	__u64 steal;
> > @@ -122,4 +124,7 @@ struct kvm_vcpu_pv_apf_data {
> >  #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
> >  #define KVM_PV_EOI_DISABLED 0x0
> >  
> > +#define KVM_SEV_LIVE_MIGRATION_ENABLED			(1 << 0)
> > +#define KVM_SEV_LIVE_MIGRATION_EXTENSIONS_SUPPORTED	(1 << 1)
> > +
> >  #endif /* _UAPI_ASM_X86_KVM_PARA_H */
> > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> > index 6efe0410fb72..8fcee0b45231 100644
> > --- a/arch/x86/kernel/kvm.c
> > +++ b/arch/x86/kernel/kvm.c
> > @@ -418,6 +418,10 @@ static void __init sev_map_percpu_data(void)
> >  	if (!sev_active())
> >  		return;
> >  
> > +	if (kvm_para_has_feature(KVM_FEATURE_SEV_LIVE_MIGRATION)) {
> > +		wrmsrl(MSR_KVM_SEV_LIVE_MIG_EN, KVM_SEV_LIVE_MIGRATION_ENABLED);
> > +	}
> > +
> >  	for_each_possible_cpu(cpu) {
> >  		__set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
> >  		__set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index b1c469446b07..74c8b2a7270c 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -716,7 +716,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> >  			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
> >  			     (1 << KVM_FEATURE_PV_SEND_IPI) |
> >  			     (1 << KVM_FEATURE_POLL_CONTROL) |
> > -			     (1 << KVM_FEATURE_PV_SCHED_YIELD);
> > +			     (1 << KVM_FEATURE_PV_SCHED_YIELD) |
> > +			     (1 << KVM_FEATURE_SEV_LIVE_MIGRATION);
> 
> 
> Do we want to enable this feature unconditionally ? Who will clear the
> feature flags for the non-SEV guest ?
>

The guest only enables/activates this feature if sev is active.

> >  
> >  		if (sched_info_on())
> >  			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index c99b0207a443..60ddc242a133 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -7632,6 +7632,7 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
> >  				  unsigned long npages, unsigned long enc)
> >  {
> >  	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > +	struct kvm_vcpu *vcpu = kvm->vcpus[0];
> >  	kvm_pfn_t pfn_start, pfn_end;
> >  	gfn_t gfn_start, gfn_end;
> >  	int ret;
> > @@ -7639,6 +7640,10 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
> >  	if (!sev_guest(kvm))
> >  		return -EINVAL;
> >  
> > +	if (!(vcpu->arch.msr_kvm_sev_live_migration_flag &
> > +		KVM_SEV_LIVE_MIGRATION_ENABLED))
> > +		return -ENOTTY;
> > +
> >  	if (!npages)
> >  		return 0;
> >  
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 2127ed937f53..82867b8798f8 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -2880,6 +2880,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  		vcpu->arch.msr_kvm_poll_control = data;
> >  		break;
> >  
> > +	case MSR_KVM_SEV_LIVE_MIG_EN:
> > +		vcpu->arch.msr_kvm_sev_live_migration_flag = data;
> > +		break;
> > +
> >  	case MSR_IA32_MCG_CTL:
> >  	case MSR_IA32_MCG_STATUS:
> >  	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
> > @@ -3126,6 +3130,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  	case MSR_KVM_POLL_CONTROL:
> >  		msr_info->data = vcpu->arch.msr_kvm_poll_control;
> >  		break;
> > +	case MSR_KVM_SEV_LIVE_MIG_EN:
> > +		msr_info->data = vcpu->arch.msr_kvm_sev_live_migration_flag;
> > +		break;
> >  	case MSR_IA32_P5_MC_ADDR:
> >  	case MSR_IA32_P5_MC_TYPE:
> >  	case MSR_IA32_MCG_CAP:
> > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> > index c9800fa811f6..f6a841494845 100644
> > --- a/arch/x86/mm/mem_encrypt.c
> > +++ b/arch/x86/mm/mem_encrypt.c
> > @@ -502,8 +502,20 @@ void __init mem_encrypt_init(void)
> >  	 * With SEV, we need to make a hypercall when page encryption state is
> >  	 * changed.
> >  	 */
> > -	if (sev_active())
> > +	if (sev_active()) {
> > +		unsigned long nr_pages;
> > +
> >  		pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall;
> > +
> > +		/*
> > +		 * Ensure that _bss_decrypted section is marked as decrypted in the
> > +		 * page encryption bitmap.
> > +		 */
> > +		nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
> > +			PAGE_SIZE);
> > +		set_memory_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
> > +			nr_pages, 0);
> > +	}
> 
> 
> Isn't this too late, should we be making hypercall at the same time we
> clear the encryption bit ?
> 
>

Actually this is being done somewhat lazily, after the guest enables/activates the live migration feature, it should be fine to do it
here or it can be moved into sev_map_percpu_data() where the first hypercalls are done, in both cases the __bss_decrypted section will
be marked before the live migration process is initiated.

> >  #endif
> >  
> >  	pr_info("AMD %s active\n",

Thanks,
Ashish
Kalra, Ashish April 2, 2020, 11:29 p.m. UTC | #3
Hello Brijesh,
> 
> On Tue, Mar 31, 2020 at 05:13:36PM +0000, Ashish Kalra wrote:
> > Hello Brijesh,
> > 
> > > > Actually this is being done somewhat lazily, after the guest
> > > > enables/activates the live migration feature, it should be fine to do it
> > > > here or it can be moved into sev_map_percpu_data() where the first
> > > > hypercalls are done, in both cases the __bss_decrypted section will be
> > > > marked before the live migration process is initiated.
> > > 
> > > 
> > > IMO, its not okay to do it here or inside sev_map_percpu_data(). So far,
> > > as soon as C-bit state is changed in page table we make a hypercall. It
> > > will be good idea to stick to that approach. I don't see any reason why
> > > we need to make an exception for the __bss_decrypted unless I am missing
> > > something. What will happen if VMM initiate the migration while guest
> > > BIOS is booting?  Are you saying its not supported ?
> > > 
> > 
> > The one thing this will require is checking for KVM para capability 
> > KVM_FEATURE_SEV_LIVE_MIGRATION as part of this code in startup_64(), i 
> > need to verify if i can check for this feature so early in startup code.
> > 
> > I need to check for this capability and do the wrmsrl() here as this
> > will be the 1st hypercall in the guest kernel and i will need to
> > enable live migration feature and hypercall support on the host
> > before making the hypercall.
> > 
 
I added the KVM para feature capability check here in startup_64(), and
as i thought this does "not" work and also as a side effect disables 
the KVM paravirtualization check and so KVM paravirtualization is not
detected later during kernel boot and all KVM paravirt features remain
disabled.
 
Digged deeper into this and here's what happens ...

kvm_para_has_feature() calls kvm_arch_para_feature() which in turn calls
kvm_cpuid_base() and this invokes __kvm_cpuid_base(). As the 
"boot_cpu_data" is still not populated/setup, therefore, 
__kvm_cpuid_base() does not detect X86_FEATURE_HYPERVISOR and
also as a side effect sets the variable kvm_cpuid_base == 0.

So as the kvm_para_feature() is not detected in startup_64(), therefore 
the hypercall does not get invoked and also as the side effect of calling
kvm_para_feature() in startup_64(), the static variable "kvm_cpuid_base"
gets set to 0, and later during hypervisor detection (kvm_detect), this
variable's setting causes kvm_detect() to return failure and hence
KVM paravirtualization features don't get enabled for the guest kernel.

So, calling kvm_para_has_feature() so early in startup_64() code is 
not going to work, hence, it is probably best to do the hypercall to mark
__bss_decrypted section as decrypted (lazily) as part of sev_map_percpu_data()
as per my original thought.

Thanks,
Ashish
Krish Sadhukhan April 3, 2020, 11:46 p.m. UTC | #4
On 3/29/20 11:23 PM, Ashish Kalra wrote:
> From: Ashish Kalra <ashish.kalra@amd.com>
>
> Add new KVM_FEATURE_SEV_LIVE_MIGRATION feature for guest to check
> for host-side support for SEV live migration. Also add a new custom
> MSR_KVM_SEV_LIVE_MIG_EN for guest to enable the SEV live migration
> feature.
>
> Also, ensure that _bss_decrypted section is marked as decrypted in the
> page encryption bitmap.
>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> ---
>   Documentation/virt/kvm/cpuid.rst     |  4 ++++
>   Documentation/virt/kvm/msr.rst       | 10 ++++++++++
>   arch/x86/include/asm/kvm_host.h      |  3 +++
>   arch/x86/include/uapi/asm/kvm_para.h |  5 +++++
>   arch/x86/kernel/kvm.c                |  4 ++++
>   arch/x86/kvm/cpuid.c                 |  3 ++-
>   arch/x86/kvm/svm.c                   |  5 +++++
>   arch/x86/kvm/x86.c                   |  7 +++++++
>   arch/x86/mm/mem_encrypt.c            | 14 +++++++++++++-
>   9 files changed, 53 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/virt/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst
> index 01b081f6e7ea..fcb191bb3016 100644
> --- a/Documentation/virt/kvm/cpuid.rst
> +++ b/Documentation/virt/kvm/cpuid.rst
> @@ -86,6 +86,10 @@ KVM_FEATURE_PV_SCHED_YIELD        13          guest checks this feature bit
>                                                 before using paravirtualized
>                                                 sched yield.
>   
> +KVM_FEATURE_SEV_LIVE_MIGRATION    14          guest checks this feature bit
> +                                              before enabling SEV live
> +                                              migration feature.
> +
>   KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24          host will warn if no guest-side
>                                                 per-cpu warps are expeced in
>                                                 kvmclock
> diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst
> index 33892036672d..7cd7786bbb03 100644
> --- a/Documentation/virt/kvm/msr.rst
> +++ b/Documentation/virt/kvm/msr.rst
> @@ -319,3 +319,13 @@ data:
>   
>   	KVM guests can request the host not to poll on HLT, for example if
>   	they are performing polling themselves.
> +
> +MSR_KVM_SEV_LIVE_MIG_EN:
> +        0x4b564d06
> +
> +	Control SEV Live Migration features.
> +
> +data:
> +        Bit 0 enables (1) or disables (0) host-side SEV Live Migration feature.
> +        Bit 1 enables (1) or disables (0) support for SEV Live Migration extensions.
> +        All other bits are reserved.
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index a96ef6338cd2..ad5faaed43c0 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -780,6 +780,9 @@ struct kvm_vcpu_arch {
>   
>   	u64 msr_kvm_poll_control;
>   
> +	/* SEV Live Migration MSR (AMD only) */
> +	u64 msr_kvm_sev_live_migration_flag;
> +
>   	/*
>   	 * Indicates the guest is trying to write a gfn that contains one or
>   	 * more of the PTEs used to translate the write itself, i.e. the access
> diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> index 2a8e0b6b9805..d9d4953b42ad 100644
> --- a/arch/x86/include/uapi/asm/kvm_para.h
> +++ b/arch/x86/include/uapi/asm/kvm_para.h
> @@ -31,6 +31,7 @@
>   #define KVM_FEATURE_PV_SEND_IPI	11
>   #define KVM_FEATURE_POLL_CONTROL	12
>   #define KVM_FEATURE_PV_SCHED_YIELD	13
> +#define KVM_FEATURE_SEV_LIVE_MIGRATION	14
>   
>   #define KVM_HINTS_REALTIME      0
>   
> @@ -50,6 +51,7 @@
>   #define MSR_KVM_STEAL_TIME  0x4b564d03
>   #define MSR_KVM_PV_EOI_EN      0x4b564d04
>   #define MSR_KVM_POLL_CONTROL	0x4b564d05
> +#define MSR_KVM_SEV_LIVE_MIG_EN	0x4b564d06
>   
>   struct kvm_steal_time {
>   	__u64 steal;
> @@ -122,4 +124,7 @@ struct kvm_vcpu_pv_apf_data {
>   #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
>   #define KVM_PV_EOI_DISABLED 0x0
>   
> +#define KVM_SEV_LIVE_MIGRATION_ENABLED			(1 << 0)
> +#define KVM_SEV_LIVE_MIGRATION_EXTENSIONS_SUPPORTED	(1 << 1)
> +
>   #endif /* _UAPI_ASM_X86_KVM_PARA_H */
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 6efe0410fb72..8fcee0b45231 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -418,6 +418,10 @@ static void __init sev_map_percpu_data(void)
>   	if (!sev_active())
>   		return;
>   
> +	if (kvm_para_has_feature(KVM_FEATURE_SEV_LIVE_MIGRATION)) {
> +		wrmsrl(MSR_KVM_SEV_LIVE_MIG_EN, KVM_SEV_LIVE_MIGRATION_ENABLED);
> +	}
> +
>   	for_each_possible_cpu(cpu) {
>   		__set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
>   		__set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index b1c469446b07..74c8b2a7270c 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -716,7 +716,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
>   			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
>   			     (1 << KVM_FEATURE_PV_SEND_IPI) |
>   			     (1 << KVM_FEATURE_POLL_CONTROL) |
> -			     (1 << KVM_FEATURE_PV_SCHED_YIELD);
> +			     (1 << KVM_FEATURE_PV_SCHED_YIELD) |
> +			     (1 << KVM_FEATURE_SEV_LIVE_MIGRATION);
>   
>   		if (sched_info_on())
>   			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index c99b0207a443..60ddc242a133 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -7632,6 +7632,7 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
>   				  unsigned long npages, unsigned long enc)
>   {
>   	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +	struct kvm_vcpu *vcpu = kvm->vcpus[0];
>   	kvm_pfn_t pfn_start, pfn_end;
>   	gfn_t gfn_start, gfn_end;
>   	int ret;
> @@ -7639,6 +7640,10 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
>   	if (!sev_guest(kvm))
>   		return -EINVAL;
>   
> +	if (!(vcpu->arch.msr_kvm_sev_live_migration_flag &
> +		KVM_SEV_LIVE_MIGRATION_ENABLED))
> +		return -ENOTTY;
> +
>   	if (!npages)
>   		return 0;
>   
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 2127ed937f53..82867b8798f8 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2880,6 +2880,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   		vcpu->arch.msr_kvm_poll_control = data;
>   		break;
>   
> +	case MSR_KVM_SEV_LIVE_MIG_EN:
> +		vcpu->arch.msr_kvm_sev_live_migration_flag = data;
> +		break;
> +
>   	case MSR_IA32_MCG_CTL:
>   	case MSR_IA32_MCG_STATUS:
>   	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
> @@ -3126,6 +3130,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   	case MSR_KVM_POLL_CONTROL:
>   		msr_info->data = vcpu->arch.msr_kvm_poll_control;
>   		break;
> +	case MSR_KVM_SEV_LIVE_MIG_EN:
> +		msr_info->data = vcpu->arch.msr_kvm_sev_live_migration_flag;
> +		break;
>   	case MSR_IA32_P5_MC_ADDR:
>   	case MSR_IA32_P5_MC_TYPE:
>   	case MSR_IA32_MCG_CAP:
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index c9800fa811f6..f6a841494845 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -502,8 +502,20 @@ void __init mem_encrypt_init(void)
>   	 * With SEV, we need to make a hypercall when page encryption state is
>   	 * changed.
>   	 */
> -	if (sev_active())
> +	if (sev_active()) {
> +		unsigned long nr_pages;
> +
>   		pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall;
> +
> +		/*
> +		 * Ensure that _bss_decrypted section is marked as decrypted in the
> +		 * page encryption bitmap.
> +		 */
> +		nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
> +			PAGE_SIZE);
> +		set_memory_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
> +			nr_pages, 0);
> +	}
>   #endif
>   
>   	pr_info("AMD %s active\n",
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst
index 01b081f6e7ea..fcb191bb3016 100644
--- a/Documentation/virt/kvm/cpuid.rst
+++ b/Documentation/virt/kvm/cpuid.rst
@@ -86,6 +86,10 @@  KVM_FEATURE_PV_SCHED_YIELD        13          guest checks this feature bit
                                               before using paravirtualized
                                               sched yield.
 
+KVM_FEATURE_SEV_LIVE_MIGRATION    14          guest checks this feature bit
+                                              before enabling SEV live
+                                              migration feature.
+
 KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24          host will warn if no guest-side
                                               per-cpu warps are expeced in
                                               kvmclock
diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst
index 33892036672d..7cd7786bbb03 100644
--- a/Documentation/virt/kvm/msr.rst
+++ b/Documentation/virt/kvm/msr.rst
@@ -319,3 +319,13 @@  data:
 
 	KVM guests can request the host not to poll on HLT, for example if
 	they are performing polling themselves.
+
+MSR_KVM_SEV_LIVE_MIG_EN:
+        0x4b564d06
+
+	Control SEV Live Migration features.
+
+data:
+        Bit 0 enables (1) or disables (0) host-side SEV Live Migration feature.
+        Bit 1 enables (1) or disables (0) support for SEV Live Migration extensions.
+        All other bits are reserved.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a96ef6338cd2..ad5faaed43c0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -780,6 +780,9 @@  struct kvm_vcpu_arch {
 
 	u64 msr_kvm_poll_control;
 
+	/* SEV Live Migration MSR (AMD only) */
+	u64 msr_kvm_sev_live_migration_flag;
+
 	/*
 	 * Indicates the guest is trying to write a gfn that contains one or
 	 * more of the PTEs used to translate the write itself, i.e. the access
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 2a8e0b6b9805..d9d4953b42ad 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -31,6 +31,7 @@ 
 #define KVM_FEATURE_PV_SEND_IPI	11
 #define KVM_FEATURE_POLL_CONTROL	12
 #define KVM_FEATURE_PV_SCHED_YIELD	13
+#define KVM_FEATURE_SEV_LIVE_MIGRATION	14
 
 #define KVM_HINTS_REALTIME      0
 
@@ -50,6 +51,7 @@ 
 #define MSR_KVM_STEAL_TIME  0x4b564d03
 #define MSR_KVM_PV_EOI_EN      0x4b564d04
 #define MSR_KVM_POLL_CONTROL	0x4b564d05
+#define MSR_KVM_SEV_LIVE_MIG_EN	0x4b564d06
 
 struct kvm_steal_time {
 	__u64 steal;
@@ -122,4 +124,7 @@  struct kvm_vcpu_pv_apf_data {
 #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
 #define KVM_PV_EOI_DISABLED 0x0
 
+#define KVM_SEV_LIVE_MIGRATION_ENABLED			(1 << 0)
+#define KVM_SEV_LIVE_MIGRATION_EXTENSIONS_SUPPORTED	(1 << 1)
+
 #endif /* _UAPI_ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 6efe0410fb72..8fcee0b45231 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -418,6 +418,10 @@  static void __init sev_map_percpu_data(void)
 	if (!sev_active())
 		return;
 
+	if (kvm_para_has_feature(KVM_FEATURE_SEV_LIVE_MIGRATION)) {
+		wrmsrl(MSR_KVM_SEV_LIVE_MIG_EN, KVM_SEV_LIVE_MIGRATION_ENABLED);
+	}
+
 	for_each_possible_cpu(cpu) {
 		__set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
 		__set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b1c469446b07..74c8b2a7270c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -716,7 +716,8 @@  static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
 			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
 			     (1 << KVM_FEATURE_PV_SEND_IPI) |
 			     (1 << KVM_FEATURE_POLL_CONTROL) |
-			     (1 << KVM_FEATURE_PV_SCHED_YIELD);
+			     (1 << KVM_FEATURE_PV_SCHED_YIELD) |
+			     (1 << KVM_FEATURE_SEV_LIVE_MIGRATION);
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c99b0207a443..60ddc242a133 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -7632,6 +7632,7 @@  static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
 				  unsigned long npages, unsigned long enc)
 {
 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+	struct kvm_vcpu *vcpu = kvm->vcpus[0];
 	kvm_pfn_t pfn_start, pfn_end;
 	gfn_t gfn_start, gfn_end;
 	int ret;
@@ -7639,6 +7640,10 @@  static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
 	if (!sev_guest(kvm))
 		return -EINVAL;
 
+	if (!(vcpu->arch.msr_kvm_sev_live_migration_flag &
+		KVM_SEV_LIVE_MIGRATION_ENABLED))
+		return -ENOTTY;
+
 	if (!npages)
 		return 0;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2127ed937f53..82867b8798f8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2880,6 +2880,10 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vcpu->arch.msr_kvm_poll_control = data;
 		break;
 
+	case MSR_KVM_SEV_LIVE_MIG_EN:
+		vcpu->arch.msr_kvm_sev_live_migration_flag = data;
+		break;
+
 	case MSR_IA32_MCG_CTL:
 	case MSR_IA32_MCG_STATUS:
 	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
@@ -3126,6 +3130,9 @@  int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_KVM_POLL_CONTROL:
 		msr_info->data = vcpu->arch.msr_kvm_poll_control;
 		break;
+	case MSR_KVM_SEV_LIVE_MIG_EN:
+		msr_info->data = vcpu->arch.msr_kvm_sev_live_migration_flag;
+		break;
 	case MSR_IA32_P5_MC_ADDR:
 	case MSR_IA32_P5_MC_TYPE:
 	case MSR_IA32_MCG_CAP:
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index c9800fa811f6..f6a841494845 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -502,8 +502,20 @@  void __init mem_encrypt_init(void)
 	 * With SEV, we need to make a hypercall when page encryption state is
 	 * changed.
 	 */
-	if (sev_active())
+	if (sev_active()) {
+		unsigned long nr_pages;
+
 		pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall;
+
+		/*
+		 * Ensure that _bss_decrypted section is marked as decrypted in the
+		 * page encryption bitmap.
+		 */
+		nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
+			PAGE_SIZE);
+		set_memory_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
+			nr_pages, 0);
+	}
 #endif
 
 	pr_info("AMD %s active\n",