diff mbox series

[Part2,v6,28/49] KVM: SVM: Add KVM_SEV_SNP_LAUNCH_FINISH command

Message ID 6a513cf79bf71c479dbd72165faf1d804d77b3af.1655761627.git.ashish.kalra@amd.com (mailing list archive)
State New
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) | expand

Commit Message

Kalra, Ashish June 20, 2022, 11:08 p.m. UTC
From: Brijesh Singh <brijesh.singh@amd.com>

The KVM_SEV_SNP_LAUNCH_FINISH finalize the cryptographic digest and stores
it as the measurement of the guest at launch.

While finalizing the launch flow, it also issues the LAUNCH_UPDATE command
to encrypt the VMSA pages.

If its an SNP guest, then VMSA was added in the RMP entry as
a guest owned page and also removed from the kernel direct map
so flush it later after it is transitioned back to hypervisor
state and restored in the direct map.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 .../virt/kvm/x86/amd-memory-encryption.rst    |  22 ++++
 arch/x86/kvm/svm/sev.c                        | 119 ++++++++++++++++++
 include/uapi/linux/kvm.h                      |  14 +++
 3 files changed, 155 insertions(+)

Comments

Peter Gonda July 11, 2022, 2:05 p.m. UTC | #1
On Mon, Jun 20, 2022 at 5:08 PM Ashish Kalra <Ashish.Kalra@amd.com> wrote:
>
> From: Brijesh Singh <brijesh.singh@amd.com>
>
> The KVM_SEV_SNP_LAUNCH_FINISH finalize the cryptographic digest and stores
> it as the measurement of the guest at launch.
>
> While finalizing the launch flow, it also issues the LAUNCH_UPDATE command
> to encrypt the VMSA pages.
>
> If its an SNP guest, then VMSA was added in the RMP entry as
> a guest owned page and also removed from the kernel direct map
> so flush it later after it is transitioned back to hypervisor
> state and restored in the direct map.

Given the guest uses the SNP NAE AP boot protocol we were expecting
that there would be some option to add vCPUs to the VM but mark them
as "pending AP boot creation protocol" state. This would allow the
LaunchDigest of a VM doesn't change just because its vCPU count
changes. Would it be possible to add a new add an argument to
KVM_SNP_LAUNCH_FINISH to tell it which vCPUs to LAUNCH_UPDATE VMSA
pages for or similarly a new argument for KVM_CREATE_VCPU?

>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> ---
>  .../virt/kvm/x86/amd-memory-encryption.rst    |  22 ++++
>  arch/x86/kvm/svm/sev.c                        | 119 ++++++++++++++++++
>  include/uapi/linux/kvm.h                      |  14 +++
>  3 files changed, 155 insertions(+)
>
> diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
> index 62abd5c1f72b..750162cff87b 100644
> --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst
> +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
> @@ -514,6 +514,28 @@ Returns: 0 on success, -negative on error
>  See the SEV-SNP spec for further details on how to build the VMPL permission
>  mask and page type.
>
> +21. KVM_SNP_LAUNCH_FINISH
> +-------------------------
> +
> +After completion of the SNP guest launch flow, the KVM_SNP_LAUNCH_FINISH command can be
> +issued to make the guest ready for the execution.
> +
> +Parameters (in): struct kvm_sev_snp_launch_finish
> +
> +Returns: 0 on success, -negative on error
> +
> +::
> +
> +        struct kvm_sev_snp_launch_finish {
> +                __u64 id_block_uaddr;
> +                __u64 id_auth_uaddr;
> +                __u8 id_block_en;
> +                __u8 auth_key_en;
> +                __u8 host_data[32];
> +        };
> +
> +
> +See SEV-SNP specification for further details on launch finish input parameters.
>
>  References
>  ==========
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index a9461d352eda..a5b90469683f 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -2095,6 +2095,106 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
>         return ret;
>  }
>
> +static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
> +{
> +       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +       struct sev_data_snp_launch_update data = {};
> +       int i, ret;
> +
> +       data.gctx_paddr = __psp_pa(sev->snp_context);
> +       data.page_type = SNP_PAGE_TYPE_VMSA;
> +
> +       for (i = 0; i < kvm->created_vcpus; i++) {
> +               struct vcpu_svm *svm = to_svm(xa_load(&kvm->vcpu_array, i));

Why are we iterating over |created_vcpus| rather than using kvm_for_each_vcpu?

> +               u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
> +
> +               /* Perform some pre-encryption checks against the VMSA */
> +               ret = sev_es_sync_vmsa(svm);
> +               if (ret)
> +                       return ret;

Do we need to take the 'vcpu->mutex' lock before modifying the
vcpu,like we do for SEV-ES in sev_launch_update_vmsa()?

> +
> +               /* Transition the VMSA page to a firmware state. */
> +               ret = rmp_make_private(pfn, -1, PG_LEVEL_4K, sev->asid, true);
> +               if (ret)
> +                       return ret;
> +
> +               /* Issue the SNP command to encrypt the VMSA */
> +               data.address = __sme_pa(svm->sev_es.vmsa);
> +               ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE,
> +                                     &data, &argp->error);
> +               if (ret) {
> +                       snp_page_reclaim(pfn);
> +                       return ret;
> +               }
> +
> +               svm->vcpu.arch.guest_state_protected = true;
> +       }
> +
> +       return 0;
> +}
> +
> +static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
> +{
> +       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +       struct sev_data_snp_launch_finish *data;
> +       void *id_block = NULL, *id_auth = NULL;
> +       struct kvm_sev_snp_launch_finish params;
> +       int ret;
> +
> +       if (!sev_snp_guest(kvm))
> +               return -ENOTTY;
> +
> +       if (!sev->snp_context)
> +               return -EINVAL;
> +
> +       if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
> +               return -EFAULT;
> +
> +       /* Measure all vCPUs using LAUNCH_UPDATE before we finalize the launch flow. */
> +       ret = snp_launch_update_vmsa(kvm, argp);
> +       if (ret)
> +               return ret;
> +
> +       data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
> +       if (!data)
> +               return -ENOMEM;
> +
> +       if (params.id_block_en) {
> +               id_block = psp_copy_user_blob(params.id_block_uaddr, KVM_SEV_SNP_ID_BLOCK_SIZE);
> +               if (IS_ERR(id_block)) {
> +                       ret = PTR_ERR(id_block);
> +                       goto e_free;
> +               }
> +
> +               data->id_block_en = 1;
> +               data->id_block_paddr = __sme_pa(id_block);
> +       }
> +
> +       if (params.auth_key_en) {
> +               id_auth = psp_copy_user_blob(params.id_auth_uaddr, KVM_SEV_SNP_ID_AUTH_SIZE);
> +               if (IS_ERR(id_auth)) {
> +                       ret = PTR_ERR(id_auth);
> +                       goto e_free_id_block;
> +               }
> +
> +               data->auth_key_en = 1;
> +               data->id_auth_paddr = __sme_pa(id_auth);
> +       }
> +
> +       data->gctx_paddr = __psp_pa(sev->snp_context);
> +       ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error);
> +
> +       kfree(id_auth);
> +
> +e_free_id_block:
> +       kfree(id_block);
> +
> +e_free:
> +       kfree(data);
> +
> +       return ret;
> +}
> +
>  int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
>  {
>         struct kvm_sev_cmd sev_cmd;
> @@ -2191,6 +2291,9 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
>         case KVM_SEV_SNP_LAUNCH_UPDATE:
>                 r = snp_launch_update(kvm, &sev_cmd);
>                 break;
> +       case KVM_SEV_SNP_LAUNCH_FINISH:
> +               r = snp_launch_finish(kvm, &sev_cmd);
> +               break;
>         default:
>                 r = -EINVAL;
>                 goto out;
> @@ -2696,11 +2799,27 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
>
>         svm = to_svm(vcpu);
>
> +       /*
> +        * If its an SNP guest, then VMSA was added in the RMP entry as
> +        * a guest owned page. Transition the page to hypervisor state
> +        * before releasing it back to the system.
> +        * Also the page is removed from the kernel direct map, so flush it
> +        * later after it is transitioned back to hypervisor state and
> +        * restored in the direct map.
> +        */
> +       if (sev_snp_guest(vcpu->kvm)) {
> +               u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
> +
> +               if (host_rmp_make_shared(pfn, PG_LEVEL_4K, false))
> +                       goto skip_vmsa_free;

Why not call host_rmp_make_shared with leak==true? This old VMSA page
is now unusable IIUC.



> +       }
> +
>         if (vcpu->arch.guest_state_protected)
>                 sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
>
>         __free_page(virt_to_page(svm->sev_es.vmsa));
>
> +skip_vmsa_free:
>         if (svm->sev_es.ghcb_sa_free)
>                 kvfree(svm->sev_es.ghcb_sa);
>  }
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 9b36b07414ea..5a4662716b6a 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1814,6 +1814,7 @@ enum sev_cmd_id {
>         KVM_SEV_SNP_INIT,
>         KVM_SEV_SNP_LAUNCH_START,
>         KVM_SEV_SNP_LAUNCH_UPDATE,
> +       KVM_SEV_SNP_LAUNCH_FINISH,
>
>         KVM_SEV_NR_MAX,
>  };
> @@ -1948,6 +1949,19 @@ struct kvm_sev_snp_launch_update {
>         __u8 vmpl1_perms;
>  };
>
> +#define KVM_SEV_SNP_ID_BLOCK_SIZE      96
> +#define KVM_SEV_SNP_ID_AUTH_SIZE       4096
> +#define KVM_SEV_SNP_FINISH_DATA_SIZE   32
> +
> +struct kvm_sev_snp_launch_finish {
> +       __u64 id_block_uaddr;
> +       __u64 id_auth_uaddr;
> +       __u8 id_block_en;
> +       __u8 auth_key_en;
> +       __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
> +       __u8 pad[6];
> +};
> +
>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
>  #define KVM_DEV_ASSIGN_PCI_2_3         (1 << 1)
>  #define KVM_DEV_ASSIGN_MASK_INTX       (1 << 2)
> --
> 2.25.1
>
Kalra, Ashish July 11, 2022, 10:41 p.m. UTC | #2
[AMD Official Use Only - General]

Hello Peter,

>> The KVM_SEV_SNP_LAUNCH_FINISH finalize the cryptographic digest and 
>> stores it as the measurement of the guest at launch.
>>
>> While finalizing the launch flow, it also issues the LAUNCH_UPDATE 
>> command to encrypt the VMSA pages.

>Given the guest uses the SNP NAE AP boot protocol we were expecting that there would be some option to add vCPUs to the VM but mark them as "pending AP boot creation protocol" state. This would allow the LaunchDigest of a VM doesn't change >just because its vCPU count changes. Would it be possible to add a new add an argument to KVM_SNP_LAUNCH_FINISH to tell it which vCPUs to LAUNCH_UPDATE VMSA pages for or similarly a new argument for KVM_CREATE_VCPU?

But don't we want/need to measure all vCPUs using LAUNCH_UPDATE_VMSA before we issue SNP_LAUNCH_FINISH command ?

If we are going to add vCPUs and mark them as "pending AP boot creation" state then how are we going to do LAUNCH_UPDATE_VMSAs for them after SNP_LAUNCH_FINISH ?

int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd 
>> +*argp) {
>> +       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
>> +       struct sev_data_snp_launch_update data = {};
>> +       int i, ret;
>> +
>> +       data.gctx_paddr = __psp_pa(sev->snp_context);
>> +       data.page_type = SNP_PAGE_TYPE_VMSA;
>> +
>> +       for (i = 0; i < kvm->created_vcpus; i++) {
>> +               struct vcpu_svm *svm = 
>> + to_svm(xa_load(&kvm->vcpu_array, i));

> Why are we iterating over |created_vcpus| rather than using kvm_for_each_vcpu?

Yes we should be using kvm_for_each_vcpu(), that will also help avoid touching implementation
specific details and hide complexities such as xa_load(), locking requirements, etc.

Additionally, kvm_for_each_vcpu() works on online_cpus, but I think that is what we should
be considering at LAUNCH_UPDATE_VMSA time, via-a-vis created_vcpus.

>> +               u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
>> +
>> +               /* Perform some pre-encryption checks against the VMSA */
>> +               ret = sev_es_sync_vmsa(svm);
>> +               if (ret)
>> +                       return ret;

>Do we need to take the 'vcpu->mutex' lock before modifying the vcpu,like we do for SEV-ES in sev_launch_update_vmsa()?

This is using the per-cpu vcpu_svm structure,  but we may need to guard against the KVM vCPU ioctl requests, so yes it is
safer to take the 'vcpu->mutex' lock here. 

>> +       /*
>> +        * If its an SNP guest, then VMSA was added in the RMP entry as
>> +        * a guest owned page. Transition the page to hypervisor state
>> +        * before releasing it back to the system.
>> +        * Also the page is removed from the kernel direct map, so flush it
>> +        * later after it is transitioned back to hypervisor state and
>> +        * restored in the direct map.
>> +        */
>> +       if (sev_snp_guest(vcpu->kvm)) {
>> +               u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
>> +
>> +               if (host_rmp_make_shared(pfn, PG_LEVEL_4K, false))
>> +                       goto skip_vmsa_free;

>Why not call host_rmp_make_shared with leak==true? This old VMSA page is now unusable IIUC.

Yes the old VMSA page is now unavailable and lost, so makes sense to call host_rmp_make_shared() with leak==true.

Thanks,
Ashish
Peter Gonda July 12, 2022, 2:45 p.m. UTC | #3
On Mon, Jul 11, 2022 at 4:41 PM Kalra, Ashish <Ashish.Kalra@amd.com> wrote:
>
> [AMD Official Use Only - General]
>
> Hello Peter,
>
> >> The KVM_SEV_SNP_LAUNCH_FINISH finalize the cryptographic digest and
> >> stores it as the measurement of the guest at launch.
> >>
> >> While finalizing the launch flow, it also issues the LAUNCH_UPDATE
> >> command to encrypt the VMSA pages.
>
> >Given the guest uses the SNP NAE AP boot protocol we were expecting that there would be some option to add vCPUs to the VM but mark them as "pending AP boot creation protocol" state. This would allow the LaunchDigest of a VM doesn't change >just because its vCPU count changes. Would it be possible to add a new add an argument to KVM_SNP_LAUNCH_FINISH to tell it which vCPUs to LAUNCH_UPDATE VMSA pages for or similarly a new argument for KVM_CREATE_VCPU?
>
> But don't we want/need to measure all vCPUs using LAUNCH_UPDATE_VMSA before we issue SNP_LAUNCH_FINISH command ?
>
> If we are going to add vCPUs and mark them as "pending AP boot creation" state then how are we going to do LAUNCH_UPDATE_VMSAs for them after SNP_LAUNCH_FINISH ?

If I understand correctly we don't need or even want the APs to be
LAUNCH_UPDATE_VMSA'd. LAUNCH_UPDATEing all the VMSAs causes VMs with
different numbers of vCPUs to have different launch digests. Its my
understanding the SNP AP Creation protocol was to solve this so that
VMs with different vcpu counts have the same launch digest.

Looking at patch "[Part2,v6,44/49] KVM: SVM: Support SEV-SNP AP
Creation NAE event" and section "4.1.9 SNP AP Creation" of the GHCB
spec. There is no need to mark the LAUNCH_UPDATE the AP's VMSA or mark
the vCPUs runnable. Instead we can do that only for the BSP. Then in
the guest UEFI the BSP can: create new VMSAs from guest pages,
RMPADJUST them into the RMP state VMSA, then use the SNP AP Creation
NAE to get the hypervisor to mark them runnable. I believe this is all
setup in the UEFI patch:
https://www.mail-archive.com/devel@edk2.groups.io/msg38460.html.
Kalra, Ashish July 12, 2022, 3:22 p.m. UTC | #4
[AMD Official Use Only - General]

Hello Peter,

>> >Given the guest uses the SNP NAE AP boot protocol we were expecting that there would be some option to add vCPUs to the VM but mark them as "pending AP boot creation protocol" state. This would allow the LaunchDigest of a VM doesn't change >just because its vCPU count changes. Would it be possible to add a new add an argument to KVM_SNP_LAUNCH_FINISH to tell it which vCPUs to LAUNCH_UPDATE VMSA pages for or similarly a new argument for KVM_CREATE_VCPU?
>>
>> But don't we want/need to measure all vCPUs using LAUNCH_UPDATE_VMSA before we issue SNP_LAUNCH_FINISH command ?
>>
>> If we are going to add vCPUs and mark them as "pending AP boot creation" state then how are we going to do LAUNCH_UPDATE_VMSAs for them after SNP_LAUNCH_FINISH ?

>If I understand correctly we don't need or even want the APs to be LAUNCH_UPDATE_VMSA'd. LAUNCH_UPDATEing all the VMSAs causes VMs with different numbers of vCPUs to have different launch digests. Its my understanding the SNP AP >Creation protocol was to solve this so that VMs with different vcpu counts have the same launch digest.

>Looking at patch "[Part2,v6,44/49] KVM: SVM: Support SEV-SNP AP Creation NAE event" and section "4.1.9 SNP AP Creation" of the GHCB spec. There is no need to mark the LAUNCH_UPDATE the AP's VMSA or mark the vCPUs runnable. Instead we >can do that only for the BSP. Then in the guest UEFI the BSP can: create new VMSAs from guest pages, RMPADJUST them into the RMP state VMSA, then use the SNP AP Creation NAE to get the hypervisor to mark them runnable. I believe this is all >setup in the UEFI patch:
>https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.mail-archive.com%2Fdevel%40edk2.groups.io%2Fmsg38460.html&amp;data=05%7C01%7CAshish.Kalra%40amd.com%7Ca40178ac6f284a9e33aa08da64152baa%>7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637932339382401133%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=ZaiHHo9S24f9BB6E%>2FjexOt5TdKJQXxQDJI5QoYdDDHc%3D&amp;reserved=0.

Yes, I discussed the same with Tom, and this will be supported going forward, only the BSP will need to go through the LAUNCH_UPDATE_VMSA and at runtime the guest can dynamically create more APs using the SNP AP Creation NAE event.

Now, coming back to the original question, why do we need a separate vCPU count argument for SNP_LAUNCH_FINISH, won't the statically created vCPUs in kvm->created_vcpus/online_vcpus be sufficient for that, any dynamically created
vCPU's won't be part of the initial measurement or LaunchDigest of the VM, right ?

Thanks,
Ashish
Peter Gonda July 12, 2022, 4:04 p.m. UTC | #5
On Tue, Jul 12, 2022 at 9:22 AM Kalra, Ashish <Ashish.Kalra@amd.com> wrote:
>
> [AMD Official Use Only - General]
>
> Hello Peter,
>
> >> >Given the guest uses the SNP NAE AP boot protocol we were expecting that there would be some option to add vCPUs to the VM but mark them as "pending AP boot creation protocol" state. This would allow the LaunchDigest of a VM doesn't change >just because its vCPU count changes. Would it be possible to add a new add an argument to KVM_SNP_LAUNCH_FINISH to tell it which vCPUs to LAUNCH_UPDATE VMSA pages for or similarly a new argument for KVM_CREATE_VCPU?
> >>
> >> But don't we want/need to measure all vCPUs using LAUNCH_UPDATE_VMSA before we issue SNP_LAUNCH_FINISH command ?
> >>
> >> If we are going to add vCPUs and mark them as "pending AP boot creation" state then how are we going to do LAUNCH_UPDATE_VMSAs for them after SNP_LAUNCH_FINISH ?
>
> >If I understand correctly we don't need or even want the APs to be LAUNCH_UPDATE_VMSA'd. LAUNCH_UPDATEing all the VMSAs causes VMs with different numbers of vCPUs to have different launch digests. Its my understanding the SNP AP >Creation protocol was to solve this so that VMs with different vcpu counts have the same launch digest.
>
> >Looking at patch "[Part2,v6,44/49] KVM: SVM: Support SEV-SNP AP Creation NAE event" and section "4.1.9 SNP AP Creation" of the GHCB spec. There is no need to mark the LAUNCH_UPDATE the AP's VMSA or mark the vCPUs runnable. Instead we >can do that only for the BSP. Then in the guest UEFI the BSP can: create new VMSAs from guest pages, RMPADJUST them into the RMP state VMSA, then use the SNP AP Creation NAE to get the hypervisor to mark them runnable. I believe this is all >setup in the UEFI patch:
> >https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.mail-archive.com%2Fdevel%40edk2.groups.io%2Fmsg38460.html&amp;data=05%7C01%7CAshish.Kalra%40amd.com%7Ca40178ac6f284a9e33aa08da64152baa%>7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637932339382401133%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=ZaiHHo9S24f9BB6E%>2FjexOt5TdKJQXxQDJI5QoYdDDHc%3D&amp;reserved=0.
>
> Yes, I discussed the same with Tom, and this will be supported going forward, only the BSP will need to go through the LAUNCH_UPDATE_VMSA and at runtime the guest can dynamically create more APs using the SNP AP Creation NAE event.
>
> Now, coming back to the original question, why do we need a separate vCPU count argument for SNP_LAUNCH_FINISH, won't the statically created vCPUs in kvm->created_vcpus/online_vcpus be sufficient for that, any dynamically created
> vCPU's won't be part of the initial measurement or LaunchDigest of the VM, right ?

Are you suggesting that QEMU will KVM_CREATE_VCPU the BSP, then
LAUNCH_FINISH, then KVM_CREATE_VCPU all the APs to their VMSAs were
not LAUNCH_UPDATED? If so, it seems annoying to have to create vCPUs
at different times to get their VMSAs into different states. That's
why I was suggesting some other mechanism so we can continue to
KVM_CREATE_VCPU all the vCPUs at the same time.

>
> Thanks,
> Ashish
Tom Lendacky July 12, 2022, 5:40 p.m. UTC | #6
On 7/12/22 09:45, Peter Gonda wrote:
> On Mon, Jul 11, 2022 at 4:41 PM Kalra, Ashish <Ashish.Kalra@amd.com> wrote:
>>
>> [AMD Official Use Only - General]
>>
>> Hello Peter,
>>
>>>> The KVM_SEV_SNP_LAUNCH_FINISH finalize the cryptographic digest and
>>>> stores it as the measurement of the guest at launch.
>>>>
>>>> While finalizing the launch flow, it also issues the LAUNCH_UPDATE
>>>> command to encrypt the VMSA pages.
>>
>>> Given the guest uses the SNP NAE AP boot protocol we were expecting that there would be some option to add vCPUs to the VM but mark them as "pending AP boot creation protocol" state. This would allow the LaunchDigest of a VM doesn't change >just because its vCPU count changes. Would it be possible to add a new add an argument to KVM_SNP_LAUNCH_FINISH to tell it which vCPUs to LAUNCH_UPDATE VMSA pages for or similarly a new argument for KVM_CREATE_VCPU?
>>
>> But don't we want/need to measure all vCPUs using LAUNCH_UPDATE_VMSA before we issue SNP_LAUNCH_FINISH command ?
>>
>> If we are going to add vCPUs and mark them as "pending AP boot creation" state then how are we going to do LAUNCH_UPDATE_VMSAs for them after SNP_LAUNCH_FINISH ?
> 
> If I understand correctly we don't need or even want the APs to be
> LAUNCH_UPDATE_VMSA'd. LAUNCH_UPDATEing all the VMSAs causes VMs with
> different numbers of vCPUs to have different launch digests. Its my
> understanding the SNP AP Creation protocol was to solve this so that
> VMs with different vcpu counts have the same launch digest.
> 
> Looking at patch "[Part2,v6,44/49] KVM: SVM: Support SEV-SNP AP
> Creation NAE event" and section "4.1.9 SNP AP Creation" of the GHCB
> spec. There is no need to mark the LAUNCH_UPDATE the AP's VMSA or mark
> the vCPUs runnable. Instead we can do that only for the BSP. Then in
> the guest UEFI the BSP can: create new VMSAs from guest pages,
> RMPADJUST them into the RMP state VMSA, then use the SNP AP Creation
> NAE to get the hypervisor to mark them runnable. I believe this is all
> setup in the UEFI patch:
> https://www.mail-archive.com/devel@edk2.groups.io/msg38460.html.

Not quite...  there isn't a way to (easily) retrieve the APIC IDs for all 
of the vCPUs, which are required in order to use the AP Create event.

For this version of SNP, all of the vCPUs are measured and started by OVMF 
in the same way as SEV-ES. However, once the vCPUs have run, we now have 
the APIC ID associated with each vCPU and the AP Create event can be used 
going forward.

The SVSM support will introduce a new NAE event to the GHCB spec to 
retrieve all of the APIC IDs from the hypervisor. With that, then you 
would be able be required to perform a LAUNCH_UPDATE_VMSA against the BSP.

Thanks,
Tom
Peter Gonda July 13, 2022, 2:59 p.m. UTC | #7
On Tue, Jul 12, 2022 at 11:40 AM Tom Lendacky <thomas.lendacky@amd.com> wrote:
>
> On 7/12/22 09:45, Peter Gonda wrote:
> > On Mon, Jul 11, 2022 at 4:41 PM Kalra, Ashish <Ashish.Kalra@amd.com> wrote:
> >>
> >> [AMD Official Use Only - General]
> >>
> >> Hello Peter,
> >>
> >>>> The KVM_SEV_SNP_LAUNCH_FINISH finalize the cryptographic digest and
> >>>> stores it as the measurement of the guest at launch.
> >>>>
> >>>> While finalizing the launch flow, it also issues the LAUNCH_UPDATE
> >>>> command to encrypt the VMSA pages.
> >>
> >>> Given the guest uses the SNP NAE AP boot protocol we were expecting that there would be some option to add vCPUs to the VM but mark them as "pending AP boot creation protocol" state. This would allow the LaunchDigest of a VM doesn't change >just because its vCPU count changes. Would it be possible to add a new add an argument to KVM_SNP_LAUNCH_FINISH to tell it which vCPUs to LAUNCH_UPDATE VMSA pages for or similarly a new argument for KVM_CREATE_VCPU?
> >>
> >> But don't we want/need to measure all vCPUs using LAUNCH_UPDATE_VMSA before we issue SNP_LAUNCH_FINISH command ?
> >>
> >> If we are going to add vCPUs and mark them as "pending AP boot creation" state then how are we going to do LAUNCH_UPDATE_VMSAs for them after SNP_LAUNCH_FINISH ?
> >
> > If I understand correctly we don't need or even want the APs to be
> > LAUNCH_UPDATE_VMSA'd. LAUNCH_UPDATEing all the VMSAs causes VMs with
> > different numbers of vCPUs to have different launch digests. Its my
> > understanding the SNP AP Creation protocol was to solve this so that
> > VMs with different vcpu counts have the same launch digest.
> >
> > Looking at patch "[Part2,v6,44/49] KVM: SVM: Support SEV-SNP AP
> > Creation NAE event" and section "4.1.9 SNP AP Creation" of the GHCB
> > spec. There is no need to mark the LAUNCH_UPDATE the AP's VMSA or mark
> > the vCPUs runnable. Instead we can do that only for the BSP. Then in
> > the guest UEFI the BSP can: create new VMSAs from guest pages,
> > RMPADJUST them into the RMP state VMSA, then use the SNP AP Creation
> > NAE to get the hypervisor to mark them runnable. I believe this is all
> > setup in the UEFI patch:
> > https://www.mail-archive.com/devel@edk2.groups.io/msg38460.html.
>
> Not quite...  there isn't a way to (easily) retrieve the APIC IDs for all
> of the vCPUs, which are required in order to use the AP Create event.
>
> For this version of SNP, all of the vCPUs are measured and started by OVMF
> in the same way as SEV-ES. However, once the vCPUs have run, we now have
> the APIC ID associated with each vCPU and the AP Create event can be used
> going forward.
>
> The SVSM support will introduce a new NAE event to the GHCB spec to
> retrieve all of the APIC IDs from the hypervisor. With that, then you
> would be able be required to perform a LAUNCH_UPDATE_VMSA against the BSP.

Thank you Tom I missed that we needed to run the APs to set up their
APIC IDs for OVMF. Is there any reason we need to wait for the SVSM to
do what you describe? Couldn't the OVMF use an NAE to get all the APIC
IDs?

>
> Thanks,
> Tom
>
Jarkko Sakkinen Aug. 2, 2022, 1:28 p.m. UTC | #8
On Mon, Jun 20, 2022 at 11:08:38PM +0000, Ashish Kalra wrote:
> From: Brijesh Singh <brijesh.singh@amd.com>
> 
> The KVM_SEV_SNP_LAUNCH_FINISH finalize the cryptographic digest and stores
> it as the measurement of the guest at launch.
> 
> While finalizing the launch flow, it also issues the LAUNCH_UPDATE command
> to encrypt the VMSA pages.

Nit: for completeness sake it would nice to fully conclude whether
LAUNCH_UPDATE is usable after LAUNCH_FINISH in this paragraph.

> 
> If its an SNP guest, then VMSA was added in the RMP entry as
> a guest owned page and also removed from the kernel direct map
> so flush it later after it is transitioned back to hypervisor
> state and restored in the direct map.
> 
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> ---
>  .../virt/kvm/x86/amd-memory-encryption.rst    |  22 ++++
>  arch/x86/kvm/svm/sev.c                        | 119 ++++++++++++++++++
>  include/uapi/linux/kvm.h                      |  14 +++
>  3 files changed, 155 insertions(+)
> 
> diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
> index 62abd5c1f72b..750162cff87b 100644
> --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst
> +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
> @@ -514,6 +514,28 @@ Returns: 0 on success, -negative on error
>  See the SEV-SNP spec for further details on how to build the VMPL permission
>  mask and page type.
>  
> +21. KVM_SNP_LAUNCH_FINISH
> +-------------------------
> +
> +After completion of the SNP guest launch flow, the KVM_SNP_LAUNCH_FINISH command can be
> +issued to make the guest ready for the execution.

Some remark about LAUNCH_UPDATE post-LAUNCH_FINISH would be nice.

> +
> +Parameters (in): struct kvm_sev_snp_launch_finish
> +
> +Returns: 0 on success, -negative on error
> +
> +::
> +
> +        struct kvm_sev_snp_launch_finish {
> +                __u64 id_block_uaddr;
> +                __u64 id_auth_uaddr;
> +                __u8 id_block_en;
> +                __u8 auth_key_en;
> +                __u8 host_data[32];
> +        };
> +
> +
> +See SEV-SNP specification for further details on launch finish input parameters.
>  
>  References
>  ==========
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index a9461d352eda..a5b90469683f 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -2095,6 +2095,106 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	return ret;
>  }
>  
> +static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
> +{
> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +	struct sev_data_snp_launch_update data = {};
> +	int i, ret;
> +
> +	data.gctx_paddr = __psp_pa(sev->snp_context);
> +	data.page_type = SNP_PAGE_TYPE_VMSA;
> +
> +	for (i = 0; i < kvm->created_vcpus; i++) {
> +		struct vcpu_svm *svm = to_svm(xa_load(&kvm->vcpu_array, i));
> +		u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
> +
> +		/* Perform some pre-encryption checks against the VMSA */
> +		ret = sev_es_sync_vmsa(svm);
> +		if (ret)
> +			return ret;
> +
> +		/* Transition the VMSA page to a firmware state. */
> +		ret = rmp_make_private(pfn, -1, PG_LEVEL_4K, sev->asid, true);
> +		if (ret)
> +			return ret;
> +
> +		/* Issue the SNP command to encrypt the VMSA */
> +		data.address = __sme_pa(svm->sev_es.vmsa);
> +		ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE,
> +				      &data, &argp->error);
> +		if (ret) {
> +			snp_page_reclaim(pfn);
> +			return ret;
> +		}
> +
> +		svm->vcpu.arch.guest_state_protected = true;
> +	}
> +
> +	return 0;
> +}
> +
> +static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
> +{
> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +	struct sev_data_snp_launch_finish *data;
> +	void *id_block = NULL, *id_auth = NULL;
> +	struct kvm_sev_snp_launch_finish params;

Nit: "params" should be the 2nd declaration (reverse
christmas tree order).

> +	int ret;
> +
> +	if (!sev_snp_guest(kvm))
> +		return -ENOTTY;
> +
> +	if (!sev->snp_context)
> +		return -EINVAL;
> +
> +	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
> +		return -EFAULT;
> +
> +	/* Measure all vCPUs using LAUNCH_UPDATE before we finalize the launch flow. */
> +	ret = snp_launch_update_vmsa(kvm, argp);
> +	if (ret)
> +		return ret;
> +
> +	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
> +	if (!data)
> +		return -ENOMEM;
> +
> +	if (params.id_block_en) {
> +		id_block = psp_copy_user_blob(params.id_block_uaddr, KVM_SEV_SNP_ID_BLOCK_SIZE);
> +		if (IS_ERR(id_block)) {
> +			ret = PTR_ERR(id_block);
> +			goto e_free;
> +		}
> +
> +		data->id_block_en = 1;
> +		data->id_block_paddr = __sme_pa(id_block);
> +	}
> +
> +	if (params.auth_key_en) {
> +		id_auth = psp_copy_user_blob(params.id_auth_uaddr, KVM_SEV_SNP_ID_AUTH_SIZE);
> +		if (IS_ERR(id_auth)) {
> +			ret = PTR_ERR(id_auth);
> +			goto e_free_id_block;
> +		}
> +
> +		data->auth_key_en = 1;
> +		data->id_auth_paddr = __sme_pa(id_auth);
> +	}
> +
> +	data->gctx_paddr = __psp_pa(sev->snp_context);
> +	ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error);
> +
> +	kfree(id_auth);
> +
> +e_free_id_block:
> +	kfree(id_block);
> +
> +e_free:
> +	kfree(data);
> +
> +	return ret;
> +}
> +
>  int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
>  {
>  	struct kvm_sev_cmd sev_cmd;
> @@ -2191,6 +2291,9 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
>  	case KVM_SEV_SNP_LAUNCH_UPDATE:
>  		r = snp_launch_update(kvm, &sev_cmd);
>  		break;
> +	case KVM_SEV_SNP_LAUNCH_FINISH:
> +		r = snp_launch_finish(kvm, &sev_cmd);
> +		break;
>  	default:
>  		r = -EINVAL;
>  		goto out;
> @@ -2696,11 +2799,27 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
>  
>  	svm = to_svm(vcpu);
>  
> +	/*
> +	 * If its an SNP guest, then VMSA was added in the RMP entry as
> +	 * a guest owned page. Transition the page to hypervisor state
> +	 * before releasing it back to the system.
> +	 * Also the page is removed from the kernel direct map, so flush it
> +	 * later after it is transitioned back to hypervisor state and
> +	 * restored in the direct map.
> +	 */
> +	if (sev_snp_guest(vcpu->kvm)) {
> +		u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
> +
> +		if (host_rmp_make_shared(pfn, PG_LEVEL_4K, false))
> +			goto skip_vmsa_free;
> +	}
> +
>  	if (vcpu->arch.guest_state_protected)
>  		sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
>  
>  	__free_page(virt_to_page(svm->sev_es.vmsa));
>  
> +skip_vmsa_free:
>  	if (svm->sev_es.ghcb_sa_free)
>  		kvfree(svm->sev_es.ghcb_sa);
>  }
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 9b36b07414ea..5a4662716b6a 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1814,6 +1814,7 @@ enum sev_cmd_id {
>  	KVM_SEV_SNP_INIT,
>  	KVM_SEV_SNP_LAUNCH_START,
>  	KVM_SEV_SNP_LAUNCH_UPDATE,
> +	KVM_SEV_SNP_LAUNCH_FINISH,
>  
>  	KVM_SEV_NR_MAX,
>  };
> @@ -1948,6 +1949,19 @@ struct kvm_sev_snp_launch_update {
>  	__u8 vmpl1_perms;
>  };
>  
> +#define KVM_SEV_SNP_ID_BLOCK_SIZE	96
> +#define KVM_SEV_SNP_ID_AUTH_SIZE	4096
> +#define KVM_SEV_SNP_FINISH_DATA_SIZE	32
> +
> +struct kvm_sev_snp_launch_finish {
> +	__u64 id_block_uaddr;
> +	__u64 id_auth_uaddr;
> +	__u8 id_block_en;
> +	__u8 auth_key_en;
> +	__u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
> +	__u8 pad[6];
> +};
> +
>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
>  #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
>  #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)
> -- 
> 2.25.1
> 

BR, Jarkko
Harald Hoyer Sept. 9, 2022, 8:04 a.m. UTC | #9
Replying inline to the patch (and not with a in-reply-to patch, as nitted by Sean Christopherson).

Am 21.06.22 um 01:08 schrieb Ashish Kalra:
> From: Brijesh Singh <brijesh.singh@amd.com>
> 
> The KVM_SEV_SNP_LAUNCH_FINISH finalize the cryptographic digest and stores
> it as the measurement of the guest at launch.
> 
> While finalizing the launch flow, it also issues the LAUNCH_UPDATE command
> to encrypt the VMSA pages.
> 
> If its an SNP guest, then VMSA was added in the RMP entry as
> a guest owned page and also removed from the kernel direct map
> so flush it later after it is transitioned back to hypervisor
> state and restored in the direct map.
> 
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> ---
>   .../virt/kvm/x86/amd-memory-encryption.rst    |  22 ++++
>   arch/x86/kvm/svm/sev.c                        | 119 ++++++++++++++++++
>   include/uapi/linux/kvm.h                      |  14 +++
>   3 files changed, 155 insertions(+)
> 
> diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
> index 62abd5c1f72b..750162cff87b 100644
> --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst
> +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
> @@ -514,6 +514,28 @@ Returns: 0 on success, -negative on error
>   See the SEV-SNP spec for further details on how to build the VMPL permission
>   mask and page type.
>   
> +21. KVM_SNP_LAUNCH_FINISH
> +-------------------------
> +
> +After completion of the SNP guest launch flow, the KVM_SNP_LAUNCH_FINISH command can be
> +issued to make the guest ready for the execution.
> +
> +Parameters (in): struct kvm_sev_snp_launch_finish
> +
> +Returns: 0 on success, -negative on error
> +
> +::
> +
> +        struct kvm_sev_snp_launch_finish {
> +                __u64 id_block_uaddr;
> +                __u64 id_auth_uaddr;
> +                __u8 id_block_en;
> +                __u8 auth_key_en;
> +                __u8 host_data[32];
> +        };
> +
> +
> +See SEV-SNP specification for further details on launch finish input parameters.
>   
>   References
>   ==========
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index a9461d352eda..a5b90469683f 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -2095,6 +2095,106 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
>   	return ret;
>   }
>   
> +static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
> +{
> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +	struct sev_data_snp_launch_update data = {};
> +	int i, ret;
> +
> +	data.gctx_paddr = __psp_pa(sev->snp_context);
> +	data.page_type = SNP_PAGE_TYPE_VMSA;
> +
> +	for (i = 0; i < kvm->created_vcpus; i++) {
> +		struct vcpu_svm *svm = to_svm(xa_load(&kvm->vcpu_array, i));
> +		u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
> +
> +		/* Perform some pre-encryption checks against the VMSA */
> +		ret = sev_es_sync_vmsa(svm);
> +		if (ret)
> +			return ret;
> +
> +		/* Transition the VMSA page to a firmware state. */
> +		ret = rmp_make_private(pfn, -1, PG_LEVEL_4K, sev->asid, true);
> +		if (ret)
> +			return ret;
> +
> +		/* Issue the SNP command to encrypt the VMSA */
> +		data.address = __sme_pa(svm->sev_es.vmsa);
> +		ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE,
> +				      &data, &argp->error);
> +		if (ret) {
> +			snp_page_reclaim(pfn);
> +			return ret;
> +		}
> +
> +		svm->vcpu.arch.guest_state_protected = true;
> +	}
> +
> +	return 0;
> +}
> +
> +static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
> +{
> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +	struct sev_data_snp_launch_finish *data;
> +	void *id_block = NULL, *id_auth = NULL;
> +	struct kvm_sev_snp_launch_finish params;
> +	int ret;
> +
> +	if (!sev_snp_guest(kvm))
> +		return -ENOTTY;
> +
> +	if (!sev->snp_context)
> +		return -EINVAL;
> +
> +	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
> +		return -EFAULT;
> +
> +	/* Measure all vCPUs using LAUNCH_UPDATE before we finalize the launch flow. */
> +	ret = snp_launch_update_vmsa(kvm, argp);

This poses a real problem for those, who want to precalculate the digest beforehand and sign their TEE without loading the TEE:
1. We don't know the contents of the VMSA, nor the hash of it.
2. Who guarantees, that future kernels have the same VMSA contents?

I would propose at least one additional ioctl parameter specifying the final VMSA for the SNP_PAGE_TYPE_VMSA snp_launch_update_vmsa.
This parameter could specify to use:
- the current VMSA
- or a VMSA resembling the CPU state on reset, where the contents is guaranteed to never change and have a defined digest
- or a user provided VMSA

> +	if (ret)
> +		return ret;
> +
> +	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
> +	if (!data)
> +		return -ENOMEM;
> +
> +	if (params.id_block_en) {
> +		id_block = psp_copy_user_blob(params.id_block_uaddr, KVM_SEV_SNP_ID_BLOCK_SIZE);
> +		if (IS_ERR(id_block)) {
> +			ret = PTR_ERR(id_block);
> +			goto e_free;
> +		}
> +
> +		data->id_block_en = 1;
> +		data->id_block_paddr = __sme_pa(id_block);
> +	}
> +
> +	if (params.auth_key_en) {

The `params.auth_key_en` indicator does _not_ specify, whether an ID_AUTH struct should be sent or not,
but wheter the ID_AUTH struct contains an author key or not. The firmware always expects an ID_AUTH block.

So, please move the upper `if` to enclose only `data->auth_key_en = 1;`, or use my patch sent in-reply to this mail yesterday.

> +		id_auth = psp_copy_user_blob(params.id_auth_uaddr, KVM_SEV_SNP_ID_AUTH_SIZE);
> +		if (IS_ERR(id_auth)) {
> +			ret = PTR_ERR(id_auth);
> +			goto e_free_id_block;
> +		}
> +
> +		data->auth_key_en = 1;
> +		data->id_auth_paddr = __sme_pa(id_auth);
> +	}
> +
> +	data->gctx_paddr = __psp_pa(sev->snp_context);
> +	ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error);
> +
> +	kfree(id_auth);
> +
> +e_free_id_block:
> +	kfree(id_block);
> +
> +e_free:
> +	kfree(data);
> +
> +	return ret;
> +}
> +
>   int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
>   {
>   	struct kvm_sev_cmd sev_cmd;
> @@ -2191,6 +2291,9 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
>   	case KVM_SEV_SNP_LAUNCH_UPDATE:
>   		r = snp_launch_update(kvm, &sev_cmd);
>   		break;
> +	case KVM_SEV_SNP_LAUNCH_FINISH:
> +		r = snp_launch_finish(kvm, &sev_cmd);
> +		break;
>   	default:
>   		r = -EINVAL;
>   		goto out;
> @@ -2696,11 +2799,27 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
>   
>   	svm = to_svm(vcpu);
>   
> +	/*
> +	 * If its an SNP guest, then VMSA was added in the RMP entry as
> +	 * a guest owned page. Transition the page to hypervisor state
> +	 * before releasing it back to the system.
> +	 * Also the page is removed from the kernel direct map, so flush it
> +	 * later after it is transitioned back to hypervisor state and
> +	 * restored in the direct map.
> +	 */
> +	if (sev_snp_guest(vcpu->kvm)) {
> +		u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
> +
> +		if (host_rmp_make_shared(pfn, PG_LEVEL_4K, false))
> +			goto skip_vmsa_free;
> +	}
> +
>   	if (vcpu->arch.guest_state_protected)
>   		sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
>   
>   	__free_page(virt_to_page(svm->sev_es.vmsa));
>   
> +skip_vmsa_free:
>   	if (svm->sev_es.ghcb_sa_free)
>   		kvfree(svm->sev_es.ghcb_sa);
>   }
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 9b36b07414ea..5a4662716b6a 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1814,6 +1814,7 @@ enum sev_cmd_id {
>   	KVM_SEV_SNP_INIT,
>   	KVM_SEV_SNP_LAUNCH_START,
>   	KVM_SEV_SNP_LAUNCH_UPDATE,
> +	KVM_SEV_SNP_LAUNCH_FINISH,
>   
>   	KVM_SEV_NR_MAX,
>   };
> @@ -1948,6 +1949,19 @@ struct kvm_sev_snp_launch_update {
>   	__u8 vmpl1_perms;
>   };
>   
> +#define KVM_SEV_SNP_ID_BLOCK_SIZE	96
> +#define KVM_SEV_SNP_ID_AUTH_SIZE	4096
> +#define KVM_SEV_SNP_FINISH_DATA_SIZE	32
> +
> +struct kvm_sev_snp_launch_finish {
> +	__u64 id_block_uaddr;
> +	__u64 id_auth_uaddr;
> +	__u8 id_block_en;
> +	__u8 auth_key_en;
> +	__u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
> +	__u8 pad[6];
> +};
> +
>   #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
>   #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
>   #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
index 62abd5c1f72b..750162cff87b 100644
--- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
@@ -514,6 +514,28 @@  Returns: 0 on success, -negative on error
 See the SEV-SNP spec for further details on how to build the VMPL permission
 mask and page type.
 
+21. KVM_SNP_LAUNCH_FINISH
+-------------------------
+
+After completion of the SNP guest launch flow, the KVM_SNP_LAUNCH_FINISH command can be
+issued to make the guest ready for the execution.
+
+Parameters (in): struct kvm_sev_snp_launch_finish
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_snp_launch_finish {
+                __u64 id_block_uaddr;
+                __u64 id_auth_uaddr;
+                __u8 id_block_en;
+                __u8 auth_key_en;
+                __u8 host_data[32];
+        };
+
+
+See SEV-SNP specification for further details on launch finish input parameters.
 
 References
 ==========
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index a9461d352eda..a5b90469683f 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2095,6 +2095,106 @@  static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	return ret;
 }
 
+static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+	struct sev_data_snp_launch_update data = {};
+	int i, ret;
+
+	data.gctx_paddr = __psp_pa(sev->snp_context);
+	data.page_type = SNP_PAGE_TYPE_VMSA;
+
+	for (i = 0; i < kvm->created_vcpus; i++) {
+		struct vcpu_svm *svm = to_svm(xa_load(&kvm->vcpu_array, i));
+		u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
+
+		/* Perform some pre-encryption checks against the VMSA */
+		ret = sev_es_sync_vmsa(svm);
+		if (ret)
+			return ret;
+
+		/* Transition the VMSA page to a firmware state. */
+		ret = rmp_make_private(pfn, -1, PG_LEVEL_4K, sev->asid, true);
+		if (ret)
+			return ret;
+
+		/* Issue the SNP command to encrypt the VMSA */
+		data.address = __sme_pa(svm->sev_es.vmsa);
+		ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE,
+				      &data, &argp->error);
+		if (ret) {
+			snp_page_reclaim(pfn);
+			return ret;
+		}
+
+		svm->vcpu.arch.guest_state_protected = true;
+	}
+
+	return 0;
+}
+
+static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+	struct sev_data_snp_launch_finish *data;
+	void *id_block = NULL, *id_auth = NULL;
+	struct kvm_sev_snp_launch_finish params;
+	int ret;
+
+	if (!sev_snp_guest(kvm))
+		return -ENOTTY;
+
+	if (!sev->snp_context)
+		return -EINVAL;
+
+	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+		return -EFAULT;
+
+	/* Measure all vCPUs using LAUNCH_UPDATE before we finalize the launch flow. */
+	ret = snp_launch_update_vmsa(kvm, argp);
+	if (ret)
+		return ret;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
+	if (!data)
+		return -ENOMEM;
+
+	if (params.id_block_en) {
+		id_block = psp_copy_user_blob(params.id_block_uaddr, KVM_SEV_SNP_ID_BLOCK_SIZE);
+		if (IS_ERR(id_block)) {
+			ret = PTR_ERR(id_block);
+			goto e_free;
+		}
+
+		data->id_block_en = 1;
+		data->id_block_paddr = __sme_pa(id_block);
+	}
+
+	if (params.auth_key_en) {
+		id_auth = psp_copy_user_blob(params.id_auth_uaddr, KVM_SEV_SNP_ID_AUTH_SIZE);
+		if (IS_ERR(id_auth)) {
+			ret = PTR_ERR(id_auth);
+			goto e_free_id_block;
+		}
+
+		data->auth_key_en = 1;
+		data->id_auth_paddr = __sme_pa(id_auth);
+	}
+
+	data->gctx_paddr = __psp_pa(sev->snp_context);
+	ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error);
+
+	kfree(id_auth);
+
+e_free_id_block:
+	kfree(id_block);
+
+e_free:
+	kfree(data);
+
+	return ret;
+}
+
 int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
 {
 	struct kvm_sev_cmd sev_cmd;
@@ -2191,6 +2291,9 @@  int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
 	case KVM_SEV_SNP_LAUNCH_UPDATE:
 		r = snp_launch_update(kvm, &sev_cmd);
 		break;
+	case KVM_SEV_SNP_LAUNCH_FINISH:
+		r = snp_launch_finish(kvm, &sev_cmd);
+		break;
 	default:
 		r = -EINVAL;
 		goto out;
@@ -2696,11 +2799,27 @@  void sev_free_vcpu(struct kvm_vcpu *vcpu)
 
 	svm = to_svm(vcpu);
 
+	/*
+	 * If its an SNP guest, then VMSA was added in the RMP entry as
+	 * a guest owned page. Transition the page to hypervisor state
+	 * before releasing it back to the system.
+	 * Also the page is removed from the kernel direct map, so flush it
+	 * later after it is transitioned back to hypervisor state and
+	 * restored in the direct map.
+	 */
+	if (sev_snp_guest(vcpu->kvm)) {
+		u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
+
+		if (host_rmp_make_shared(pfn, PG_LEVEL_4K, false))
+			goto skip_vmsa_free;
+	}
+
 	if (vcpu->arch.guest_state_protected)
 		sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
 
 	__free_page(virt_to_page(svm->sev_es.vmsa));
 
+skip_vmsa_free:
 	if (svm->sev_es.ghcb_sa_free)
 		kvfree(svm->sev_es.ghcb_sa);
 }
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 9b36b07414ea..5a4662716b6a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1814,6 +1814,7 @@  enum sev_cmd_id {
 	KVM_SEV_SNP_INIT,
 	KVM_SEV_SNP_LAUNCH_START,
 	KVM_SEV_SNP_LAUNCH_UPDATE,
+	KVM_SEV_SNP_LAUNCH_FINISH,
 
 	KVM_SEV_NR_MAX,
 };
@@ -1948,6 +1949,19 @@  struct kvm_sev_snp_launch_update {
 	__u8 vmpl1_perms;
 };
 
+#define KVM_SEV_SNP_ID_BLOCK_SIZE	96
+#define KVM_SEV_SNP_ID_AUTH_SIZE	4096
+#define KVM_SEV_SNP_FINISH_DATA_SIZE	32
+
+struct kvm_sev_snp_launch_finish {
+	__u64 id_block_uaddr;
+	__u64 id_auth_uaddr;
+	__u8 id_block_en;
+	__u8 auth_key_en;
+	__u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
+	__u8 pad[6];
+};
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)