Message ID | 20210707183616.5620-24-brijesh.singh@amd.com (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | Herbert Xu |
Headers | show |
Series | Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand |
> > +static int snp_decommission_context(struct kvm *kvm) > +{ > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > + struct sev_data_snp_decommission data = {}; > + int ret; > + > + /* If context is not created then do nothing */ > + if (!sev->snp_context) > + return 0; > + > + data.gctx_paddr = __sme_pa(sev->snp_context); > + ret = snp_guest_decommission(&data, NULL); > + if (ret) > + return ret; Should we WARN or pr_err here? I see in the case of snp_launch_start's e_free_context we do not warn the user they have leaked a firmware page. > > + > + /* free the context page now */ > + snp_free_firmware_page(sev->snp_context); > + sev->snp_context = NULL; > + > + return 0; > +} > + > void sev_vm_destroy(struct kvm *kvm) > { > struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > @@ -1847,7 +1969,15 @@ void sev_vm_destroy(struct kvm *kvm) > > mutex_unlock(&kvm->lock); > > - sev_unbind_asid(kvm, sev->handle); > + if (sev_snp_guest(kvm)) { > + if (snp_decommission_context(kvm)) { > + pr_err("Failed to free SNP guest context, leaking asid!\n"); Should these errors be a WARN since we are leaking some state? > + return; > + } > + } else { > + sev_unbind_asid(kvm, sev->handle); > + } > + > sev_asid_free(sev); > } >
On Wed, Jul 07, 2021, Brijesh Singh wrote: > @@ -1527,6 +1530,100 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) > return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error); > } > > +static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp) > +{ > + struct sev_data_snp_gctx_create data = {}; > + void *context; > + int rc; > + > + /* Allocate memory for context page */ Eh, I'd drop this comment. It's quite obvious that a page is being allocated and that it's being assigned to the context. > + context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT); > + if (!context) > + return NULL; > + > + data.gctx_paddr = __psp_pa(context); > + rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error); > + if (rc) { > + snp_free_firmware_page(context); > + return NULL; > + } > + > + return context; > +} > + > +static int snp_bind_asid(struct kvm *kvm, int *error) > +{ > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > + struct sev_data_snp_activate data = {}; > + int asid = sev_get_asid(kvm); > + int ret, retry_count = 0; > + > + /* Activate ASID on the given context */ > + data.gctx_paddr = __psp_pa(sev->snp_context); > + data.asid = asid; > +again: > + ret = sev_issue_cmd(kvm, SEV_CMD_SNP_ACTIVATE, &data, error); > + > + /* Check if the DF_FLUSH is required, and try again */ Please provide more info on why this may be necessary. I can see from the code that it does a flush and retries, but I have no idea why a flush would be required in the first place, e.g. why can't KVM guarantee that everything is in the proper state before attempting to bind an ASID? > + if (ret && (*error == SEV_RET_DFFLUSH_REQUIRED) && (!retry_count)) { > + /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */ > + down_read(&sev_deactivate_lock); > + wbinvd_on_all_cpus(); > + ret = snp_guest_df_flush(error); > + up_read(&sev_deactivate_lock); > + > + if (ret) > + return ret; > + > + /* only one retry */ Again, please explain why. Is this arbitrary? Is retrying more than once guaranteed to be useless? > + retry_count = 1; > + > + goto again; > + } > + > + return ret; > +} ... > void sev_vm_destroy(struct kvm *kvm) > { > struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > @@ -1847,7 +1969,15 @@ void sev_vm_destroy(struct kvm *kvm) > > mutex_unlock(&kvm->lock); > > - sev_unbind_asid(kvm, sev->handle); > + if (sev_snp_guest(kvm)) { > + if (snp_decommission_context(kvm)) { > + pr_err("Failed to free SNP guest context, leaking asid!\n"); I agree with Peter that this likely warrants a WARN. If a WARN isn't justified, e.g. this can happen without a KVM/CPU bug, then there absolutely needs to be a massive comment explaining why we have code that result in memory leaks. > + return; > + } > + } else { > + sev_unbind_asid(kvm, sev->handle); > + } > + > sev_asid_free(sev); > } > > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index b9ea99f8579e..bc5582b44356 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -67,6 +67,7 @@ struct kvm_sev_info { > u64 ap_jump_table; /* SEV-ES AP Jump Table address */ > struct kvm *enc_context_owner; /* Owner of copied encryption context */ > struct misc_cg *misc_cg; /* For misc cgroup accounting */ > + void *snp_context; /* SNP guest context page */ > }; > > struct kvm_svm { > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 989a64aa1ae5..dbd05179d8fa 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -1680,6 +1680,7 @@ enum sev_cmd_id { > > /* SNP specific commands */ > KVM_SEV_SNP_INIT = 256, > + KVM_SEV_SNP_LAUNCH_START, > > KVM_SEV_NR_MAX, > }; > @@ -1781,6 +1782,14 @@ struct kvm_snp_init { > __u64 flags; > }; > > +struct kvm_sev_snp_launch_start { > + __u64 policy; > + __u64 ma_uaddr; > + __u8 ma_en; > + __u8 imi_en; > + __u8 gosvw[16]; Hmm, I'd prefer to pad this out to be 8-byte sized. > +}; > + > #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) > #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) > #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) > -- > 2.17.1 >
On 7/16/21 2:43 PM, Sean Christopherson wrote: > On Wed, Jul 07, 2021, Brijesh Singh wrote: >> @@ -1527,6 +1530,100 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) >> return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error); >> } >> >> +static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp) >> +{ >> + struct sev_data_snp_gctx_create data = {}; >> + void *context; >> + int rc; >> + >> + /* Allocate memory for context page */ > Eh, I'd drop this comment. It's quite obvious that a page is being allocated > and that it's being assigned to the context. > >> + context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT); >> + if (!context) >> + return NULL; >> + >> + data.gctx_paddr = __psp_pa(context); >> + rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error); >> + if (rc) { >> + snp_free_firmware_page(context); >> + return NULL; >> + } >> + >> + return context; >> +} >> + >> +static int snp_bind_asid(struct kvm *kvm, int *error) >> +{ >> + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; >> + struct sev_data_snp_activate data = {}; >> + int asid = sev_get_asid(kvm); >> + int ret, retry_count = 0; >> + >> + /* Activate ASID on the given context */ >> + data.gctx_paddr = __psp_pa(sev->snp_context); >> + data.asid = asid; >> +again: >> + ret = sev_issue_cmd(kvm, SEV_CMD_SNP_ACTIVATE, &data, error); >> + >> + /* Check if the DF_FLUSH is required, and try again */ > Please provide more info on why this may be necessary. I can see from the code > that it does a flush and retries, but I have no idea why a flush would be required > in the first place, e.g. why can't KVM guarantee that everything is in the proper > state before attempting to bind an ASID? Ah good question, we already have function to recycle the ASIDs. The recycle happen during the ASID allocation. While recycling it issues the SEV/ES DF_FLUSH command. That function need to be enhanced to use the SNP specific DF_FLUSH command when ASID's are getting reused. I wish we had one DF_FLUSH which internally takes care of both the cases. Thinking loud, maybe firmware team decided to add a new one because what if someone is not using the SEV and SEV-ES or some fw does not support legacy SEV commands. I will fix it and remove the DF_FLUSH from the launch_start. > >> + if (ret && (*error == SEV_RET_DFFLUSH_REQUIRED) && (!retry_count)) { >> + /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */ >> + down_read(&sev_deactivate_lock); >> + wbinvd_on_all_cpus(); >> + ret = snp_guest_df_flush(error); >> + up_read(&sev_deactivate_lock); >> + >> + if (ret) >> + return ret; >> + >> + /* only one retry */ > Again, please explain why. Is this arbitrary? Is retrying more than once > guaranteed to be useless? > >> + retry_count = 1; >> + >> + goto again; >> + } >> + >> + return ret; >> +} > ... > >> void sev_vm_destroy(struct kvm *kvm) >> { >> struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; >> @@ -1847,7 +1969,15 @@ void sev_vm_destroy(struct kvm *kvm) >> >> mutex_unlock(&kvm->lock); >> >> - sev_unbind_asid(kvm, sev->handle); >> + if (sev_snp_guest(kvm)) { >> + if (snp_decommission_context(kvm)) { >> + pr_err("Failed to free SNP guest context, leaking asid!\n"); > I agree with Peter that this likely warrants a WARN. If a WARN isn't justified, > e.g. this can happen without a KVM/CPU bug, then there absolutely needs to be a > massive comment explaining why we have code that result in memory leaks. Ack. > >> + return; >> + } >> + } else { >> + sev_unbind_asid(kvm, sev->handle); >> + } >> + >> sev_asid_free(sev); >> } >> >> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h >> index b9ea99f8579e..bc5582b44356 100644 >> --- a/arch/x86/kvm/svm/svm.h >> +++ b/arch/x86/kvm/svm/svm.h >> @@ -67,6 +67,7 @@ struct kvm_sev_info { >> u64 ap_jump_table; /* SEV-ES AP Jump Table address */ >> struct kvm *enc_context_owner; /* Owner of copied encryption context */ >> struct misc_cg *misc_cg; /* For misc cgroup accounting */ >> + void *snp_context; /* SNP guest context page */ >> }; >> >> struct kvm_svm { >> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h >> index 989a64aa1ae5..dbd05179d8fa 100644 >> --- a/include/uapi/linux/kvm.h >> +++ b/include/uapi/linux/kvm.h >> @@ -1680,6 +1680,7 @@ enum sev_cmd_id { >> >> /* SNP specific commands */ >> KVM_SEV_SNP_INIT = 256, >> + KVM_SEV_SNP_LAUNCH_START, >> >> KVM_SEV_NR_MAX, >> }; >> @@ -1781,6 +1782,14 @@ struct kvm_snp_init { >> __u64 flags; >> }; >> >> +struct kvm_sev_snp_launch_start { >> + __u64 policy; >> + __u64 ma_uaddr; >> + __u8 ma_en; >> + __u8 imi_en; >> + __u8 gosvw[16]; > Hmm, I'd prefer to pad this out to be 8-byte sized. Noted. >> +}; >> + >> #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) >> #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) >> #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) >> -- >> 2.17.1 >>
diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst index 75ca60b6d40a..8620383d405a 100644 --- a/Documentation/virt/kvm/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/amd-memory-encryption.rst @@ -443,6 +443,31 @@ Returns: 0 on success, -negative on error __u64 flags; /* must be zero */ }; + +19. KVM_SNP_LAUNCH_START +------------------------ + +The KVM_SNP_LAUNCH_START command is used for creating the memory encryption +context for the SEV-SNP guest. To create the encryption context, user must +provide a guest policy, migration agent (if any) and guest OS visible +workarounds value as defined SEV-SNP specification. + +Parameters (in): struct kvm_snp_launch_start + +Returns: 0 on success, -negative on error + +:: + + struct kvm_sev_snp_launch_start { + __u64 policy; /* Guest policy to use. */ + __u64 ma_uaddr; /* userspace address of migration agent */ + __u8 ma_en; /* 1 if the migtation agent is enabled */ + __u8 imi_en; /* set IMI to 1. */ + __u8 gosvw[16]; /* guest OS visible workarounds */ + }; + +See the SEV-SNP specification for further detail on the launch input. + References ========== diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index be31221f0a47..f44a657e8912 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -20,6 +20,7 @@ #include <asm/fpu/internal.h> #include <asm/trapnr.h> +#include <asm/sev.h> #include "x86.h" #include "svm.h" @@ -75,6 +76,8 @@ static unsigned long sev_me_mask; static unsigned long *sev_asid_bitmap; static unsigned long *sev_reclaim_asid_bitmap; +static int snp_decommission_context(struct kvm *kvm); + struct enc_region { struct list_head list; unsigned long npages; @@ -1527,6 +1530,100 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error); } +static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct sev_data_snp_gctx_create data = {}; + void *context; + int rc; + + /* Allocate memory for context page */ + context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT); + if (!context) + return NULL; + + data.gctx_paddr = __psp_pa(context); + rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error); + if (rc) { + snp_free_firmware_page(context); + return NULL; + } + + return context; +} + +static int snp_bind_asid(struct kvm *kvm, int *error) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct sev_data_snp_activate data = {}; + int asid = sev_get_asid(kvm); + int ret, retry_count = 0; + + /* Activate ASID on the given context */ + data.gctx_paddr = __psp_pa(sev->snp_context); + data.asid = asid; +again: + ret = sev_issue_cmd(kvm, SEV_CMD_SNP_ACTIVATE, &data, error); + + /* Check if the DF_FLUSH is required, and try again */ + if (ret && (*error == SEV_RET_DFFLUSH_REQUIRED) && (!retry_count)) { + /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */ + down_read(&sev_deactivate_lock); + wbinvd_on_all_cpus(); + ret = snp_guest_df_flush(error); + up_read(&sev_deactivate_lock); + + if (ret) + return ret; + + /* only one retry */ + retry_count = 1; + + goto again; + } + + return ret; +} + +static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct sev_data_snp_launch_start start = {}; + struct kvm_sev_snp_launch_start params; + int rc; + + if (!sev_snp_guest(kvm)) + return -ENOTTY; + + if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + return -EFAULT; + + /* Initialize the guest context */ + sev->snp_context = snp_context_create(kvm, argp); + if (!sev->snp_context) + return -ENOTTY; + + /* Issue the LAUNCH_START command */ + start.gctx_paddr = __psp_pa(sev->snp_context); + start.policy = params.policy; + memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw)); + rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_START, &start, &argp->error); + if (rc) + goto e_free_context; + + /* Bind ASID to this guest */ + sev->fd = argp->sev_fd; + rc = snp_bind_asid(kvm, &argp->error); + if (rc) + goto e_free_context; + + return 0; + +e_free_context: + snp_decommission_context(kvm); + + return rc; +} + int svm_mem_enc_op(struct kvm *kvm, void __user *argp) { struct kvm_sev_cmd sev_cmd; @@ -1616,6 +1713,9 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp) case KVM_SEV_RECEIVE_FINISH: r = sev_receive_finish(kvm, &sev_cmd); break; + case KVM_SEV_SNP_LAUNCH_START: + r = snp_launch_start(kvm, &sev_cmd); + break; default: r = -EINVAL; goto out; @@ -1809,6 +1909,28 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd) return ret; } +static int snp_decommission_context(struct kvm *kvm) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct sev_data_snp_decommission data = {}; + int ret; + + /* If context is not created then do nothing */ + if (!sev->snp_context) + return 0; + + data.gctx_paddr = __sme_pa(sev->snp_context); + ret = snp_guest_decommission(&data, NULL); + if (ret) + return ret; + + /* free the context page now */ + snp_free_firmware_page(sev->snp_context); + sev->snp_context = NULL; + + return 0; +} + void sev_vm_destroy(struct kvm *kvm) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; @@ -1847,7 +1969,15 @@ void sev_vm_destroy(struct kvm *kvm) mutex_unlock(&kvm->lock); - sev_unbind_asid(kvm, sev->handle); + if (sev_snp_guest(kvm)) { + if (snp_decommission_context(kvm)) { + pr_err("Failed to free SNP guest context, leaking asid!\n"); + return; + } + } else { + sev_unbind_asid(kvm, sev->handle); + } + sev_asid_free(sev); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index b9ea99f8579e..bc5582b44356 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -67,6 +67,7 @@ struct kvm_sev_info { u64 ap_jump_table; /* SEV-ES AP Jump Table address */ struct kvm *enc_context_owner; /* Owner of copied encryption context */ struct misc_cg *misc_cg; /* For misc cgroup accounting */ + void *snp_context; /* SNP guest context page */ }; struct kvm_svm { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 989a64aa1ae5..dbd05179d8fa 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1680,6 +1680,7 @@ enum sev_cmd_id { /* SNP specific commands */ KVM_SEV_SNP_INIT = 256, + KVM_SEV_SNP_LAUNCH_START, KVM_SEV_NR_MAX, }; @@ -1781,6 +1782,14 @@ struct kvm_snp_init { __u64 flags; }; +struct kvm_sev_snp_launch_start { + __u64 policy; + __u64 ma_uaddr; + __u8 ma_en; + __u8 imi_en; + __u8 gosvw[16]; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
KVM_SEV_SNP_LAUNCH_START begins the launch process for an SEV-SNP guest. The command initializes a cryptographic digest context used to construct the measurement of the guest. If the guest is expected to be migrated, the command also binds a migration agent (MA) to the guest. For more information see the SEV-SNP specification. Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> --- .../virt/kvm/amd-memory-encryption.rst | 25 ++++ arch/x86/kvm/svm/sev.c | 132 +++++++++++++++++- arch/x86/kvm/svm/svm.h | 1 + include/uapi/linux/kvm.h | 9 ++ 4 files changed, 166 insertions(+), 1 deletion(-)