diff mbox series

[7/9] i386/sev: Refactor setting of reset vector and initial CPU state

Message ID bf045e3364a0575d9680832dec5acba7aa441895.1709044754.git.roy.hopkins@suse.com (mailing list archive)
State New, archived
Headers show
Series Introduce support for IGVM files | expand

Commit Message

Roy Hopkins Feb. 27, 2024, 2:50 p.m. UTC
When an SEV guest is started, the reset vector and state are
extracted from metadata that is contained in the firmware volume.

In preparation for using IGVM to setup the initial CPU state,
the code has been refactored to populate vmcb_save_area for each
CPU which is then applied during guest startup and CPU reset.

Signed-off-by: Roy Hopkins <roy.hopkins@suse.com>
---
 target/i386/sev.c | 288 +++++++++++++++++++++++++++++++++++++++++-----
 target/i386/sev.h | 110 ++++++++++++++++++
 2 files changed, 369 insertions(+), 29 deletions(-)

Comments

Daniel P. Berrangé March 1, 2024, 5:01 p.m. UTC | #1
On Tue, Feb 27, 2024 at 02:50:13PM +0000, Roy Hopkins wrote:
> When an SEV guest is started, the reset vector and state are
> extracted from metadata that is contained in the firmware volume.
> 
> In preparation for using IGVM to setup the initial CPU state,
> the code has been refactored to populate vmcb_save_area for each
> CPU which is then applied during guest startup and CPU reset.
> 
> Signed-off-by: Roy Hopkins <roy.hopkins@suse.com>
> ---
>  target/i386/sev.c | 288 +++++++++++++++++++++++++++++++++++++++++-----
>  target/i386/sev.h | 110 ++++++++++++++++++
>  2 files changed, 369 insertions(+), 29 deletions(-)
> 
> diff --git a/target/i386/sev.c b/target/i386/sev.c
> index 173de91afe..d6902432fd 100644
> --- a/target/i386/sev.c
> +++ b/target/i386/sev.c
> @@ -74,9 +74,7 @@ struct SevGuestState {
>      SevState state;
>      gchar *measurement;
>  
> -    uint32_t reset_cs;
> -    uint32_t reset_ip;
> -    bool reset_data_valid;
> +    QTAILQ_HEAD(, SevLaunchVmsa) launch_vmsa;
>  };
>  
>  #define DEFAULT_GUEST_POLICY    0x1 /* disable debug */
> @@ -99,6 +97,12 @@ typedef struct QEMU_PACKED SevHashTableDescriptor {
>  /* hard code sha256 digest size */
>  #define HASH_SIZE 32
>  
> +/* Convert between SEV-ES VMSA and SegmentCache flags/attributes */
> +#define FLAGS_VMSA_TO_SEGCACHE(flags) \
> +    ((((flags) & 0xff00) << 12) | (((flags) & 0xff) << 8))
> +#define FLAGS_SEGCACHE_TO_VMSA(flags) \
> +    ((((flags) & 0xff00) >> 8) | (((flags) & 0xf00000) >> 12))
> +
>  typedef struct QEMU_PACKED SevHashTableEntry {
>      QemuUUID guid;
>      uint16_t len;
> @@ -125,6 +129,15 @@ typedef struct QEMU_PACKED PaddedSevHashTable {
>  QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0);
>  
>  static SevGuestState *sev_guest;
> +
> +typedef struct SevLaunchVmsa {
> +    QTAILQ_ENTRY(SevLaunchVmsa) next;
> +
> +    uint16_t cpu_index;
> +    uint64_t gpa;
> +    struct sev_es_save_area vmsa;
> +} SevLaunchVmsa;
> +
>  static Error *sev_mig_blocker;
>  
>  static const char *const sev_fw_errlist[] = {
> @@ -291,6 +304,149 @@ sev_guest_finalize(Object *obj)
>  {
>  }
>  
> +static void sev_apply_cpu_context(CPUState *cpu)
> +{
> +    SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs);
> +    X86CPU *x86;
> +    CPUX86State *env;
> +    struct SevLaunchVmsa *launch_vmsa;
> +
> +    /* See if an initial VMSA has been provided for this CPU */
> +    QTAILQ_FOREACH(launch_vmsa, &sev_guest->launch_vmsa, next)
> +    {
> +        if (cpu->cpu_index == launch_vmsa->cpu_index) {
> +            x86 = X86_CPU(cpu);
> +            env = &x86->env;
> +
> +            /*
> +             * Ideally we would provide the VMSA directly to kvm which would
> +             * ensure that the resulting initial VMSA measurement which is
> +             * calculated during KVM_SEV_LAUNCH_UPDATE_VMSA is calculated from
> +             * exactly what we provide here. Currently this is not possible so
> +             * we need to copy the parts of the VMSA structure that we currently
> +             * support into the CPU state.
> +             */

This sounds like it is saying that the code is not honouring
everything in the VMSA defiend by the IGVM file ?

If so, that is pretty awkward. The VMSA is effectively an external
ABI between QEMU and the guest owner (or whatever is validating
guest attestation reports for them), and thus predictability and
stability of this over time is critical.

We don't want the attestation process to be dependent/variable on
the particular QEMU/KVM version, because any upgrade to QEMU/KVM
could then alter the effective VMSA that the guest owner sees.

We've already suffered pain in this respect not long ago when the
kernel arbitrarily changed a default setting which altered the
VMSA it exposed, breaking existing apps that validate attestation.

What will it take to provide the full VMSA to KVM, so that we can
guarantee to the guest owner than the VMSA for the guest is going
to perfectly match what their IGVM defined ?

> +            cpu_load_efer(env, launch_vmsa->vmsa.efer);
> +            cpu_x86_update_cr4(env, launch_vmsa->vmsa.cr4);
> +            cpu_x86_update_cr0(env, launch_vmsa->vmsa.cr0);
> +            cpu_x86_update_cr3(env, launch_vmsa->vmsa.cr3);
> +
> +            cpu_x86_load_seg_cache(
> +                env, R_CS, launch_vmsa->vmsa.cs.selector,
> +                launch_vmsa->vmsa.cs.base, launch_vmsa->vmsa.cs.limit,
> +                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.cs.attrib));
> +            cpu_x86_load_seg_cache(
> +                env, R_DS, launch_vmsa->vmsa.ds.selector,
> +                launch_vmsa->vmsa.ds.base, launch_vmsa->vmsa.ds.limit,
> +                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ds.attrib));
> +            cpu_x86_load_seg_cache(
> +                env, R_ES, launch_vmsa->vmsa.es.selector,
> +                launch_vmsa->vmsa.es.base, launch_vmsa->vmsa.es.limit,
> +                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.es.attrib));
> +            cpu_x86_load_seg_cache(
> +                env, R_FS, launch_vmsa->vmsa.fs.selector,
> +                launch_vmsa->vmsa.fs.base, launch_vmsa->vmsa.fs.limit,
> +                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.fs.attrib));
> +            cpu_x86_load_seg_cache(
> +                env, R_GS, launch_vmsa->vmsa.gs.selector,
> +                launch_vmsa->vmsa.gs.base, launch_vmsa->vmsa.gs.limit,
> +                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gs.attrib));
> +            cpu_x86_load_seg_cache(
> +                env, R_SS, launch_vmsa->vmsa.ss.selector,
> +                launch_vmsa->vmsa.ss.base, launch_vmsa->vmsa.ss.limit,
> +                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ss.attrib));
> +
> +            env->gdt.base = launch_vmsa->vmsa.gdtr.base;
> +            env->gdt.limit = launch_vmsa->vmsa.gdtr.limit;
> +            env->idt.base = launch_vmsa->vmsa.idtr.base;
> +            env->idt.limit = launch_vmsa->vmsa.idtr.limit;
> +
> +            env->regs[R_EAX] = launch_vmsa->vmsa.rax;
> +            env->regs[R_ECX] = launch_vmsa->vmsa.rcx;
> +            env->regs[R_EDX] = launch_vmsa->vmsa.rdx;
> +            env->regs[R_EBX] = launch_vmsa->vmsa.rbx;
> +            env->regs[R_ESP] = launch_vmsa->vmsa.rsp;
> +            env->regs[R_EBP] = launch_vmsa->vmsa.rbp;
> +            env->regs[R_ESI] = launch_vmsa->vmsa.rsi;
> +            env->regs[R_EDI] = launch_vmsa->vmsa.rdi;
> +#ifdef TARGET_X86_64
> +            env->regs[R_R8] = launch_vmsa->vmsa.r8;
> +            env->regs[R_R9] = launch_vmsa->vmsa.r9;
> +            env->regs[R_R10] = launch_vmsa->vmsa.r10;
> +            env->regs[R_R11] = launch_vmsa->vmsa.r11;
> +            env->regs[R_R12] = launch_vmsa->vmsa.r12;
> +            env->regs[R_R13] = launch_vmsa->vmsa.r13;
> +            env->regs[R_R14] = launch_vmsa->vmsa.r14;
> +            env->regs[R_R15] = launch_vmsa->vmsa.r15;
> +#endif
> +            env->eip = launch_vmsa->vmsa.rip;
> +            break;
> +        }
> +    }
> +}


With regards,
Daniel
Roy Hopkins March 12, 2024, 3:45 p.m. UTC | #2
On Fri, 2024-03-01 at 17:01 +0000, Daniel P. Berrangé wrote:
> On Tue, Feb 27, 2024 at 02:50:13PM +0000, Roy Hopkins wrote:
> > When an SEV guest is started, the reset vector and state are
> > extracted from metadata that is contained in the firmware volume.
> > 
> > In preparation for using IGVM to setup the initial CPU state,
> > the code has been refactored to populate vmcb_save_area for each
> > CPU which is then applied during guest startup and CPU reset.
> > 
> > Signed-off-by: Roy Hopkins <roy.hopkins@suse.com>
> > ---
> >  target/i386/sev.c | 288 +++++++++++++++++++++++++++++++++++++++++-----
> >  target/i386/sev.h | 110 ++++++++++++++++++
> >  2 files changed, 369 insertions(+), 29 deletions(-)
> > 
> > diff --git a/target/i386/sev.c b/target/i386/sev.c
> > index 173de91afe..d6902432fd 100644
> > --- a/target/i386/sev.c
> > +++ b/target/i386/sev.c
> > @@ -74,9 +74,7 @@ struct SevGuestState {
> >      SevState state;
> >      gchar *measurement;
> >  
> > -    uint32_t reset_cs;
> > -    uint32_t reset_ip;
> > -    bool reset_data_valid;
> > +    QTAILQ_HEAD(, SevLaunchVmsa) launch_vmsa;
> >  };
> >  
> >  #define DEFAULT_GUEST_POLICY    0x1 /* disable debug */
> > @@ -99,6 +97,12 @@ typedef struct QEMU_PACKED SevHashTableDescriptor {
> >  /* hard code sha256 digest size */
> >  #define HASH_SIZE 32
> >  
> > +/* Convert between SEV-ES VMSA and SegmentCache flags/attributes */
> > +#define FLAGS_VMSA_TO_SEGCACHE(flags) \
> > +    ((((flags) & 0xff00) << 12) | (((flags) & 0xff) << 8))
> > +#define FLAGS_SEGCACHE_TO_VMSA(flags) \
> > +    ((((flags) & 0xff00) >> 8) | (((flags) & 0xf00000) >> 12))
> > +
> >  typedef struct QEMU_PACKED SevHashTableEntry {
> >      QemuUUID guid;
> >      uint16_t len;
> > @@ -125,6 +129,15 @@ typedef struct QEMU_PACKED PaddedSevHashTable {
> >  QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0);
> >  
> >  static SevGuestState *sev_guest;
> > +
> > +typedef struct SevLaunchVmsa {
> > +    QTAILQ_ENTRY(SevLaunchVmsa) next;
> > +
> > +    uint16_t cpu_index;
> > +    uint64_t gpa;
> > +    struct sev_es_save_area vmsa;
> > +} SevLaunchVmsa;
> > +
> >  static Error *sev_mig_blocker;
> >  
> >  static const char *const sev_fw_errlist[] = {
> > @@ -291,6 +304,149 @@ sev_guest_finalize(Object *obj)
> >  {
> >  }
> >  
> > +static void sev_apply_cpu_context(CPUState *cpu)
> > +{
> > +    SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs);
> > +    X86CPU *x86;
> > +    CPUX86State *env;
> > +    struct SevLaunchVmsa *launch_vmsa;
> > +
> > +    /* See if an initial VMSA has been provided for this CPU */
> > +    QTAILQ_FOREACH(launch_vmsa, &sev_guest->launch_vmsa, next)
> > +    {
> > +        if (cpu->cpu_index == launch_vmsa->cpu_index) {
> > +            x86 = X86_CPU(cpu);
> > +            env = &x86->env;
> > +
> > +            /*
> > +             * Ideally we would provide the VMSA directly to kvm which
> > would
> > +             * ensure that the resulting initial VMSA measurement which is
> > +             * calculated during KVM_SEV_LAUNCH_UPDATE_VMSA is calculated
> > from
> > +             * exactly what we provide here. Currently this is not possible
> > so
> > +             * we need to copy the parts of the VMSA structure that we
> > currently
> > +             * support into the CPU state.
> > +             */
> 
> This sounds like it is saying that the code is not honouring
> everything in the VMSA defiend by the IGVM file ?
> 
> If so, that is pretty awkward. The VMSA is effectively an external
> ABI between QEMU and the guest owner (or whatever is validating
> guest attestation reports for them), and thus predictability and
> stability of this over time is critical.
> 
> We don't want the attestation process to be dependent/variable on
> the particular QEMU/KVM version, because any upgrade to QEMU/KVM
> could then alter the effective VMSA that the guest owner sees.
> 
> We've already suffered pain in this respect not long ago when the
> kernel arbitrarily changed a default setting which altered the
> VMSA it exposed, breaking existing apps that validate attestation.
> 
> What will it take to provide the full VMSA to KVM, so that we can
> guarantee to the guest owner than the VMSA for the guest is going
> to perfectly match what their IGVM defined ?
> 

Yes, the fact that we have to copy the individual fields from the VMSA to
"CPUX86State" is less than ideal - a problem made worse by the fact that the
kernel does not allow direct control over some of the fields from userspace,
"sev_features" being a good example here where "SVM_SEV_FEAT_DEBUG_SWAP" is
unconditionally added by the kernel.

The kernel VMSA is at least predictable. So, although we cannot yet allow full
flexibility in providing a complete VMSA from QEMU and guarantee it will be
honoured, we could check to see if any settings conflict with those imposed by
the kernel and exit with an error if this is the case. I chose not to implement
for this first series but could easily add a patch to support this. The problem
here is that it ties the version of QEMU to VMSA handling functionality in the
kernel. Any change to the VMSA handling in the kernel would potentially
invalidate the checks in QEMU. The one upside here is that this will easily be
detectable by the attestation measurement not matching the expected measurement
of the IGVM file. But it will be difficult for the user to determine what the
discrepancy is.

The ideal solution is to add or modify a KVM ioctl to allow the VMSA to be set
directly, overriding the state in "CPUX86State". The current
KVM_SEV_LAUNCH_UPDATE_VMSA ioctl triggers the synchronisation of the VMSA but
does not allow it to be specified directly. This could be modified for what we
need. The SEV-SNP kernel patches add KVM_SEV_SNP_LAUNCH_UPDATE which allows a
page type of VMSA to be updated, although the current patch series does not
support using this to set the initial state of the VMSA:
https://lore.kernel.org/lkml/20231230172351.574091-19-michael.roth@amd.com/ I
have experimented with this myself and have successfully modified the SEV-SNP
kernel patches to support directly setting the VMSA from QEMU.

On the other hand, I have also verified that I can indeed measure an IGVM file
loaded using the VMSA synchronisation method currently employed and get a
matching measurement from the SEV attestation report.

What would you suggest is the best way forward for this?

> > +            cpu_load_efer(env, launch_vmsa->vmsa.efer);
> > +            cpu_x86_update_cr4(env, launch_vmsa->vmsa.cr4);
> > +            cpu_x86_update_cr0(env, launch_vmsa->vmsa.cr0);
> > +            cpu_x86_update_cr3(env, launch_vmsa->vmsa.cr3);
> > +
> > +            cpu_x86_load_seg_cache(
> > +                env, R_CS, launch_vmsa->vmsa.cs.selector,
> > +                launch_vmsa->vmsa.cs.base, launch_vmsa->vmsa.cs.limit,
> > +                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.cs.attrib));
> > +            cpu_x86_load_seg_cache(
> > +                env, R_DS, launch_vmsa->vmsa.ds.selector,
> > +                launch_vmsa->vmsa.ds.base, launch_vmsa->vmsa.ds.limit,
> > +                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ds.attrib));
> > +            cpu_x86_load_seg_cache(
> > +                env, R_ES, launch_vmsa->vmsa.es.selector,
> > +                launch_vmsa->vmsa.es.base, launch_vmsa->vmsa.es.limit,
> > +                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.es.attrib));
> > +            cpu_x86_load_seg_cache(
> > +                env, R_FS, launch_vmsa->vmsa.fs.selector,
> > +                launch_vmsa->vmsa.fs.base, launch_vmsa->vmsa.fs.limit,
> > +                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.fs.attrib));
> > +            cpu_x86_load_seg_cache(
> > +                env, R_GS, launch_vmsa->vmsa.gs.selector,
> > +                launch_vmsa->vmsa.gs.base, launch_vmsa->vmsa.gs.limit,
> > +                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gs.attrib));
> > +            cpu_x86_load_seg_cache(
> > +                env, R_SS, launch_vmsa->vmsa.ss.selector,
> > +                launch_vmsa->vmsa.ss.base, launch_vmsa->vmsa.ss.limit,
> > +                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ss.attrib));
> > +
> > +            env->gdt.base = launch_vmsa->vmsa.gdtr.base;
> > +            env->gdt.limit = launch_vmsa->vmsa.gdtr.limit;
> > +            env->idt.base = launch_vmsa->vmsa.idtr.base;
> > +            env->idt.limit = launch_vmsa->vmsa.idtr.limit;
> > +
> > +            env->regs[R_EAX] = launch_vmsa->vmsa.rax;
> > +            env->regs[R_ECX] = launch_vmsa->vmsa.rcx;
> > +            env->regs[R_EDX] = launch_vmsa->vmsa.rdx;
> > +            env->regs[R_EBX] = launch_vmsa->vmsa.rbx;
> > +            env->regs[R_ESP] = launch_vmsa->vmsa.rsp;
> > +            env->regs[R_EBP] = launch_vmsa->vmsa.rbp;
> > +            env->regs[R_ESI] = launch_vmsa->vmsa.rsi;
> > +            env->regs[R_EDI] = launch_vmsa->vmsa.rdi;
> > +#ifdef TARGET_X86_64
> > +            env->regs[R_R8] = launch_vmsa->vmsa.r8;
> > +            env->regs[R_R9] = launch_vmsa->vmsa.r9;
> > +            env->regs[R_R10] = launch_vmsa->vmsa.r10;
> > +            env->regs[R_R11] = launch_vmsa->vmsa.r11;
> > +            env->regs[R_R12] = launch_vmsa->vmsa.r12;
> > +            env->regs[R_R13] = launch_vmsa->vmsa.r13;
> > +            env->regs[R_R14] = launch_vmsa->vmsa.r14;
> > +            env->regs[R_R15] = launch_vmsa->vmsa.r15;
> > +#endif
> > +            env->eip = launch_vmsa->vmsa.rip;
> > +            break;
> > +        }
> > +    }
> > +}
> 
> 
> With regards,
> Daniel

Regards,
Roy
Daniel P. Berrangé March 12, 2024, 4:12 p.m. UTC | #3
On Tue, Mar 12, 2024 at 03:45:20PM +0000, Roy Hopkins wrote:
> On Fri, 2024-03-01 at 17:01 +0000, Daniel P. Berrangé wrote:
> > On Tue, Feb 27, 2024 at 02:50:13PM +0000, Roy Hopkins wrote:
> > > +            /*
> > > +             * Ideally we would provide the VMSA directly to kvm which
> > > would
> > > +             * ensure that the resulting initial VMSA measurement which is
> > > +             * calculated during KVM_SEV_LAUNCH_UPDATE_VMSA is calculated
> > > from
> > > +             * exactly what we provide here. Currently this is not possible
> > > so
> > > +             * we need to copy the parts of the VMSA structure that we
> > > currently
> > > +             * support into the CPU state.
> > > +             */
> > 
> > This sounds like it is saying that the code is not honouring
> > everything in the VMSA defiend by the IGVM file ?
> > 
> > If so, that is pretty awkward. The VMSA is effectively an external
> > ABI between QEMU and the guest owner (or whatever is validating
> > guest attestation reports for them), and thus predictability and
> > stability of this over time is critical.
> > 
> > We don't want the attestation process to be dependent/variable on
> > the particular QEMU/KVM version, because any upgrade to QEMU/KVM
> > could then alter the effective VMSA that the guest owner sees.
> > 
> > We've already suffered pain in this respect not long ago when the
> > kernel arbitrarily changed a default setting which altered the
> > VMSA it exposed, breaking existing apps that validate attestation.
> > 
> > What will it take to provide the full VMSA to KVM, so that we can
> > guarantee to the guest owner than the VMSA for the guest is going
> > to perfectly match what their IGVM defined ?
> > 
> 
> Yes, the fact that we have to copy the individual fields from the VMSA to
> "CPUX86State" is less than ideal - a problem made worse by the fact that the
> kernel does not allow direct control over some of the fields from userspace,
> "sev_features" being a good example here where "SVM_SEV_FEAT_DEBUG_SWAP" is
> unconditionally added by the kernel.

Ah yes, the SVM_SEV_FEAT_DEBUG_SWAP feature is the one I couldn't remember
the name of in my quoted text above, that break our apps when the kernel
suddenly set it by default (thankfully now reverted in Linux with
5abf6dceb066f2b02b225fd561440c98a8062681).

> The kernel VMSA is at least predictable. So, although we cannot yet allow full
> flexibility in providing a complete VMSA from QEMU and guarantee it will be
> honoured, we could check to see if any settings conflict with those imposed by
> the kernel and exit with an error if this is the case. I chose not to implement
> for this first series but could easily add a patch to support this. The problem
> here is that it ties the version of QEMU to VMSA handling functionality in the
> kernel. Any change to the VMSA handling in the kernel would potentially
> invalidate the checks in QEMU. The one upside here is that this will easily be
> detectable by the attestation measurement not matching the expected measurement
> of the IGVM file. But it will be difficult for the user to determine what the
> discrepancy is.

Yes, the difficulty in diagnosis is the big thing I'm worried about from
a distro supportability POV. The DEBUG_SWAP issue caused us a bunch of
pain and that's before CVMs are even widely used.

I agree that hardcoding checks in QEMU is pretty unpleasant, and probably
not something that I'd want us to do. I'd want QEMU to be able to live
query the kernel's default initial VMSA, if it were to be reporting
differences vs the IGVM provided VMSA. I dn't think there's a way to
do that nicely though - i only know of ftrace probes to dump it informally.

I guess if we know & document what subset of the VMSA QEMU /can/ directly
control, that at least narrows down where to look if something does change
or go wrong.

> The ideal solution is to add or modify a KVM ioctl to allow the VMSA to be set
> directly, overriding the state in "CPUX86State". The current
> KVM_SEV_LAUNCH_UPDATE_VMSA ioctl triggers the synchronisation of the VMSA but
> does not allow it to be specified directly. This could be modified for what we
> need. The SEV-SNP kernel patches add KVM_SEV_SNP_LAUNCH_UPDATE which allows a
> page type of VMSA to be updated, although the current patch series does not
> support using this to set the initial state of the VMSA:
> https://lore.kernel.org/lkml/20231230172351.574091-19-michael.roth@amd.com/ I
> have experimented with this myself and have successfully modified the SEV-SNP
> kernel patches to support directly setting the VMSA from QEMU.
> 
> On the other hand, I have also verified that I can indeed measure an IGVM file
> loaded using the VMSA synchronisation method currently employed and get a
> matching measurement from the SEV attestation report.
> 
> What would you suggest is the best way forward for this?

I'll delegate to Paolo for an opinion on the possiblity of new (or
updated) ioctls to provide the full VMSA data.

If we can't directly set the full VMSA, then next best option is a
more formal way query to VMSA. That way libvirt could report on
what the default initial kernel VMSA state is, which could be useful
debug info for any bug reports.

With regards,
Daniel
Roy Hopkins March 18, 2024, 11:49 a.m. UTC | #4
On Tue, 2024-03-12 at 16:12 +0000, Daniel P. Berrangé wrote:
> On Tue, Mar 12, 2024 at 03:45:20PM +0000, Roy Hopkins wrote:
> > On Fri, 2024-03-01 at 17:01 +0000, Daniel P. Berrangé wrote:
> > > On Tue, Feb 27, 2024 at 02:50:13PM +0000, Roy Hopkins wrote:
> > > > +            /*
> > > > +             * Ideally we would provide the VMSA directly to kvm which
> > > > would
> > > > +             * ensure that the resulting initial VMSA measurement which
> > > > is
> > > > +             * calculated during KVM_SEV_LAUNCH_UPDATE_VMSA is
> > > > calculated
> > > > from
> > > > +             * exactly what we provide here. Currently this is not
> > > > possible
> > > > so
> > > > +             * we need to copy the parts of the VMSA structure that we
> > > > currently
> > > > +             * support into the CPU state.
> > > > +             */
> > > 
> > > This sounds like it is saying that the code is not honouring
> > > everything in the VMSA defiend by the IGVM file ?
> > > 
> > > If so, that is pretty awkward. The VMSA is effectively an external
> > > ABI between QEMU and the guest owner (or whatever is validating
> > > guest attestation reports for them), and thus predictability and
> > > stability of this over time is critical.
> > > 
> > > We don't want the attestation process to be dependent/variable on
> > > the particular QEMU/KVM version, because any upgrade to QEMU/KVM
> > > could then alter the effective VMSA that the guest owner sees.
> > > 
> > > We've already suffered pain in this respect not long ago when the
> > > kernel arbitrarily changed a default setting which altered the
> > > VMSA it exposed, breaking existing apps that validate attestation.
> > > 
> > > What will it take to provide the full VMSA to KVM, so that we can
> > > guarantee to the guest owner than the VMSA for the guest is going
> > > to perfectly match what their IGVM defined ?
> > > 
> > 
> > Yes, the fact that we have to copy the individual fields from the VMSA to
> > "CPUX86State" is less than ideal - a problem made worse by the fact that the
> > kernel does not allow direct control over some of the fields from userspace,
> > "sev_features" being a good example here where "SVM_SEV_FEAT_DEBUG_SWAP" is
> > unconditionally added by the kernel.
> 
> Ah yes, the SVM_SEV_FEAT_DEBUG_SWAP feature is the one I couldn't remember
> the name of in my quoted text above, that break our apps when the kernel
> suddenly set it by default (thankfully now reverted in Linux with
> 5abf6dceb066f2b02b225fd561440c98a8062681).
> 
> > The kernel VMSA is at least predictable. So, although we cannot yet allow
> > full
> > flexibility in providing a complete VMSA from QEMU and guarantee it will be
> > honoured, we could check to see if any settings conflict with those imposed
> > by
> > the kernel and exit with an error if this is the case. I chose not to
> > implement
> > for this first series but could easily add a patch to support this. The
> > problem
> > here is that it ties the version of QEMU to VMSA handling functionality in
> > the
> > kernel. Any change to the VMSA handling in the kernel would potentially
> > invalidate the checks in QEMU. The one upside here is that this will easily
> > be
> > detectable by the attestation measurement not matching the expected
> > measurement
> > of the IGVM file. But it will be difficult for the user to determine what
> > the
> > discrepancy is.
> 
> Yes, the difficulty in diagnosis is the big thing I'm worried about from
> a distro supportability POV. The DEBUG_SWAP issue caused us a bunch of
> pain and that's before CVMs are even widely used.
> 
> I agree that hardcoding checks in QEMU is pretty unpleasant, and probably
> not something that I'd want us to do. I'd want QEMU to be able to live
> query the kernel's default initial VMSA, if it were to be reporting
> differences vs the IGVM provided VMSA. I dn't think there's a way to
> do that nicely though - i only know of ftrace probes to dump it informally.
> 
> I guess if we know & document what subset of the VMSA QEMU /can/ directly
> control, that at least narrows down where to look if something does change
> or go wrong.
> 
Yes, it makes sense to document the subset that can be reliably set by QEMU,
along with any modifications made byt the kernel. Perhaps I should go one step
further and check that the VMSA does not contain any entries beyond what is
copied in "sev_apply_cpu_context()"? If any field other than those explicitly
copied by this function contain a non-zero value then an error is generated. As
you suggest this will limit the scope of any measurement differences to the
documented subset.

> > The ideal solution is to add or modify a KVM ioctl to allow the VMSA to be
> > set
> > directly, overriding the state in "CPUX86State". The current
> > KVM_SEV_LAUNCH_UPDATE_VMSA ioctl triggers the synchronisation of the VMSA
> > but
> > does not allow it to be specified directly. This could be modified for what
> > we
> > need. The SEV-SNP kernel patches add KVM_SEV_SNP_LAUNCH_UPDATE which allows
> > a
> > page type of VMSA to be updated, although the current patch series does not
> > support using this to set the initial state of the VMSA:
> > https://lore.kernel.org/lkml/20231230172351.574091-19-michael.roth@amd.com/ 
> > I
> > have experimented with this myself and have successfully modified the SEV-
> > SNP
> > kernel patches to support directly setting the VMSA from QEMU.
> > 
> > On the other hand, I have also verified that I can indeed measure an IGVM
> > file
> > loaded using the VMSA synchronisation method currently employed and get a
> > matching measurement from the SEV attestation report.
> > 
> > What would you suggest is the best way forward for this?
> 
> I'll delegate to Paolo for an opinion on the possiblity of new (or
> updated) ioctls to provide the full VMSA data.
> 
> If we can't directly set the full VMSA, then next best option is a
> more formal way query to VMSA. That way libvirt could report on
> what the default initial kernel VMSA state is, which could be useful
> debug info for any bug reports.
Setting the full VMSA definitely seems like the right option here. Querying the
VMSA that was actually measured would obviously give us the ability to diagnose
problems with the measurement but does not allow full compatibility with the
IGVM specification. This will potentially restrict the types of guests that can
be packaged using IGVM.

Another thing to bear in mind is that with the incoming host kernel support for
SEV-SNP, there are more constraints on how the VMSA is measured and populated.
In particular, the current patches for SEV-SNP automatically sync and measure
the VMSA as the final stage of guest measurement, requiring the IGVM file to
provide the VMSA as the final directive for the measurement to match. Also, the
kernel hardcodes the VMSA GPA, again requiring the IGVM file to match. If we
have the ability to provide the VMSA directly (including the GPA of the VMSA)
then these restrictions are removed.

I'd suggest that for SEV and SEV-ES, the current method of syncing certain
fields (and updating the QEMU documentation to describe this) is sufficient for
now. And perhaps this is ok for SEV-SNP too, but we should pursue the ability to
provide the full VMSA at least in the SEV-SNP case.

> 
> With regards,
> Daniel

Kind regards,
Roy
diff mbox series

Patch

diff --git a/target/i386/sev.c b/target/i386/sev.c
index 173de91afe..d6902432fd 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -74,9 +74,7 @@  struct SevGuestState {
     SevState state;
     gchar *measurement;
 
-    uint32_t reset_cs;
-    uint32_t reset_ip;
-    bool reset_data_valid;
+    QTAILQ_HEAD(, SevLaunchVmsa) launch_vmsa;
 };
 
 #define DEFAULT_GUEST_POLICY    0x1 /* disable debug */
@@ -99,6 +97,12 @@  typedef struct QEMU_PACKED SevHashTableDescriptor {
 /* hard code sha256 digest size */
 #define HASH_SIZE 32
 
+/* Convert between SEV-ES VMSA and SegmentCache flags/attributes */
+#define FLAGS_VMSA_TO_SEGCACHE(flags) \
+    ((((flags) & 0xff00) << 12) | (((flags) & 0xff) << 8))
+#define FLAGS_SEGCACHE_TO_VMSA(flags) \
+    ((((flags) & 0xff00) >> 8) | (((flags) & 0xf00000) >> 12))
+
 typedef struct QEMU_PACKED SevHashTableEntry {
     QemuUUID guid;
     uint16_t len;
@@ -125,6 +129,15 @@  typedef struct QEMU_PACKED PaddedSevHashTable {
 QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0);
 
 static SevGuestState *sev_guest;
+
+typedef struct SevLaunchVmsa {
+    QTAILQ_ENTRY(SevLaunchVmsa) next;
+
+    uint16_t cpu_index;
+    uint64_t gpa;
+    struct sev_es_save_area vmsa;
+} SevLaunchVmsa;
+
 static Error *sev_mig_blocker;
 
 static const char *const sev_fw_errlist[] = {
@@ -291,6 +304,149 @@  sev_guest_finalize(Object *obj)
 {
 }
 
+static void sev_apply_cpu_context(CPUState *cpu)
+{
+    SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs);
+    X86CPU *x86;
+    CPUX86State *env;
+    struct SevLaunchVmsa *launch_vmsa;
+
+    /* See if an initial VMSA has been provided for this CPU */
+    QTAILQ_FOREACH(launch_vmsa, &sev_guest->launch_vmsa, next)
+    {
+        if (cpu->cpu_index == launch_vmsa->cpu_index) {
+            x86 = X86_CPU(cpu);
+            env = &x86->env;
+
+            /*
+             * Ideally we would provide the VMSA directly to kvm which would
+             * ensure that the resulting initial VMSA measurement which is
+             * calculated during KVM_SEV_LAUNCH_UPDATE_VMSA is calculated from
+             * exactly what we provide here. Currently this is not possible so
+             * we need to copy the parts of the VMSA structure that we currently
+             * support into the CPU state.
+             */
+            cpu_load_efer(env, launch_vmsa->vmsa.efer);
+            cpu_x86_update_cr4(env, launch_vmsa->vmsa.cr4);
+            cpu_x86_update_cr0(env, launch_vmsa->vmsa.cr0);
+            cpu_x86_update_cr3(env, launch_vmsa->vmsa.cr3);
+
+            cpu_x86_load_seg_cache(
+                env, R_CS, launch_vmsa->vmsa.cs.selector,
+                launch_vmsa->vmsa.cs.base, launch_vmsa->vmsa.cs.limit,
+                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.cs.attrib));
+            cpu_x86_load_seg_cache(
+                env, R_DS, launch_vmsa->vmsa.ds.selector,
+                launch_vmsa->vmsa.ds.base, launch_vmsa->vmsa.ds.limit,
+                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ds.attrib));
+            cpu_x86_load_seg_cache(
+                env, R_ES, launch_vmsa->vmsa.es.selector,
+                launch_vmsa->vmsa.es.base, launch_vmsa->vmsa.es.limit,
+                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.es.attrib));
+            cpu_x86_load_seg_cache(
+                env, R_FS, launch_vmsa->vmsa.fs.selector,
+                launch_vmsa->vmsa.fs.base, launch_vmsa->vmsa.fs.limit,
+                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.fs.attrib));
+            cpu_x86_load_seg_cache(
+                env, R_GS, launch_vmsa->vmsa.gs.selector,
+                launch_vmsa->vmsa.gs.base, launch_vmsa->vmsa.gs.limit,
+                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gs.attrib));
+            cpu_x86_load_seg_cache(
+                env, R_SS, launch_vmsa->vmsa.ss.selector,
+                launch_vmsa->vmsa.ss.base, launch_vmsa->vmsa.ss.limit,
+                FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ss.attrib));
+
+            env->gdt.base = launch_vmsa->vmsa.gdtr.base;
+            env->gdt.limit = launch_vmsa->vmsa.gdtr.limit;
+            env->idt.base = launch_vmsa->vmsa.idtr.base;
+            env->idt.limit = launch_vmsa->vmsa.idtr.limit;
+
+            env->regs[R_EAX] = launch_vmsa->vmsa.rax;
+            env->regs[R_ECX] = launch_vmsa->vmsa.rcx;
+            env->regs[R_EDX] = launch_vmsa->vmsa.rdx;
+            env->regs[R_EBX] = launch_vmsa->vmsa.rbx;
+            env->regs[R_ESP] = launch_vmsa->vmsa.rsp;
+            env->regs[R_EBP] = launch_vmsa->vmsa.rbp;
+            env->regs[R_ESI] = launch_vmsa->vmsa.rsi;
+            env->regs[R_EDI] = launch_vmsa->vmsa.rdi;
+#ifdef TARGET_X86_64
+            env->regs[R_R8] = launch_vmsa->vmsa.r8;
+            env->regs[R_R9] = launch_vmsa->vmsa.r9;
+            env->regs[R_R10] = launch_vmsa->vmsa.r10;
+            env->regs[R_R11] = launch_vmsa->vmsa.r11;
+            env->regs[R_R12] = launch_vmsa->vmsa.r12;
+            env->regs[R_R13] = launch_vmsa->vmsa.r13;
+            env->regs[R_R14] = launch_vmsa->vmsa.r14;
+            env->regs[R_R15] = launch_vmsa->vmsa.r15;
+#endif
+            env->eip = launch_vmsa->vmsa.rip;
+            break;
+        }
+    }
+}
+
+static int sev_set_cpu_context(uint16_t cpu_index, const void *ctx,
+                               uint32_t ctx_len, hwaddr gpa)
+{
+    SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs);
+    SevLaunchVmsa *launch_vmsa;
+    CPUState *cpu;
+    bool exists = false;
+
+    /*
+     * Setting the CPU context is only supported for SEV-ES. The context buffer
+     * will contain a sev_es_save_area from the Linux kernel which is defined by
+     * "Table B-4. VMSA Layout, State Save Area for SEV-ES" in the AMD64 APM,
+     * Volume 2.
+     */
+
+    if (!sev_es_enabled()) {
+        error_report("SEV: unable to set CPU context: Not supported");
+        return 1;
+    }
+
+    if (ctx_len < sizeof(struct sev_es_save_area)) {
+        error_report("SEV: unable to set CPU context: "
+                     "Invalid context provided");
+        return 1;
+    }
+
+    cpu = qemu_get_cpu(cpu_index);
+    if (!cpu) {
+        error_report("SEV: unable to set CPU context for out of bounds "
+                     "CPU index %d", cpu_index);
+        return 1;
+    }
+
+    /*
+     * If the context of this VP has already been set then replace it with the
+     * new context.
+     */
+    QTAILQ_FOREACH(launch_vmsa, &sev_guest->launch_vmsa, next)
+    {
+        if (cpu_index == launch_vmsa->cpu_index) {
+            launch_vmsa->gpa = gpa;
+            memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa));
+            exists = true;
+            break;
+        }
+    }
+
+    if (!exists) {
+        /* New VP context */
+        launch_vmsa = g_new0(SevLaunchVmsa, 1);
+        memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa));
+        launch_vmsa->cpu_index = cpu_index;
+        launch_vmsa->gpa = gpa;
+        QTAILQ_INSERT_TAIL(&sev_guest->launch_vmsa, launch_vmsa, next);
+    }
+
+    /* Synchronise the VMSA with the current CPU state */
+    sev_apply_cpu_context(cpu);
+
+    return 0;
+}
+
 static char *
 sev_guest_get_session_file(Object *obj, Error **errp)
 {
@@ -394,6 +550,7 @@  sev_guest_instance_init(Object *obj)
     object_property_add_uint32_ptr(obj, "reduced-phys-bits",
                                    &sev->reduced_phys_bits,
                                    OBJ_PROP_FLAG_READWRITE);
+    QTAILQ_INIT(&sev->launch_vmsa);
 }
 
 /* sev guest info */
@@ -784,6 +941,16 @@  static int
 sev_launch_update_vmsa(SevGuestState *sev)
 {
     int ret, fw_error;
+    CPUState *cpu;
+
+    /*
+     * The initial CPU state is measured as part of KVM_SEV_LAUNCH_UPDATE_VMSA.
+     * Synchronise the CPU state to any provided launch VMSA structures.
+     */
+    CPU_FOREACH(cpu) {
+        sev_apply_cpu_context(cpu);
+    }
+
 
     ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error);
     if (ret) {
@@ -1196,34 +1363,99 @@  sev_es_find_reset_vector(void *flash_ptr, uint64_t flash_size,
     return sev_es_parse_reset_block(info, addr);
 }
 
-void sev_es_set_reset_vector(CPUState *cpu)
-{
-    X86CPU *x86;
-    CPUX86State *env;
 
-    /* Only update if we have valid reset information */
-    if (!sev_guest || !sev_guest->reset_data_valid) {
-        return;
-    }
+static void seg_to_vmsa(const SegmentCache *cpu_seg, struct vmcb_seg *vmsa_seg)
+{
+    vmsa_seg->selector = cpu_seg->selector;
+    vmsa_seg->base = cpu_seg->base;
+    vmsa_seg->limit = cpu_seg->limit;
+    vmsa_seg->attrib = FLAGS_SEGCACHE_TO_VMSA(cpu_seg->flags);
+}
 
-    /* Do not update the BSP reset state */
-    if (cpu->cpu_index == 0) {
-        return;
-    }
+static void initialize_vmsa(const CPUState *cpu, struct sev_es_save_area *vmsa)
+{
+    const X86CPU *x86 = X86_CPU(cpu);
+    const CPUX86State *env = &x86->env;
 
-    x86 = X86_CPU(cpu);
-    env = &x86->env;
+    /*
+     * Initialize the SEV-ES save area from the current state of
+     * the CPU. The entire state does not need to be copied, only the state
+     * that is copied back to the CPUState in sev_apply_cpu_context.
+     */
+    memset(vmsa, 0, sizeof(struct sev_es_save_area));
+    vmsa->efer = env->efer;
+    vmsa->cr0 = env->cr[0];
+    vmsa->cr3 = env->cr[3];
+    vmsa->cr4 = env->cr[4];
+
+    seg_to_vmsa(&env->segs[R_CS], &vmsa->cs);
+    seg_to_vmsa(&env->segs[R_DS], &vmsa->ds);
+    seg_to_vmsa(&env->segs[R_ES], &vmsa->es);
+    seg_to_vmsa(&env->segs[R_FS], &vmsa->fs);
+    seg_to_vmsa(&env->segs[R_GS], &vmsa->gs);
+    seg_to_vmsa(&env->segs[R_SS], &vmsa->ss);
+
+    seg_to_vmsa(&env->gdt, &vmsa->gdtr);
+    seg_to_vmsa(&env->idt, &vmsa->idtr);
+
+    vmsa->rax = env->regs[R_EAX];
+    vmsa->rcx = env->regs[R_ECX];
+    vmsa->rdx = env->regs[R_EDX];
+    vmsa->rbx = env->regs[R_EBX];
+    vmsa->rsp = env->regs[R_ESP];
+    vmsa->rbp = env->regs[R_EBP];
+    vmsa->rsi = env->regs[R_ESI];
+    vmsa->rdi = env->regs[R_EDI];
+
+#ifdef TARGET_X86_64
+    vmsa->r8 = env->regs[R_R8];
+    vmsa->r9 = env->regs[R_R9];
+    vmsa->r10 = env->regs[R_R10];
+    vmsa->r11 = env->regs[R_R11];
+    vmsa->r12 = env->regs[R_R12];
+    vmsa->r13 = env->regs[R_R13];
+    vmsa->r14 = env->regs[R_R14];
+    vmsa->r15 = env->regs[R_R15];
+#endif
+
+    vmsa->rip = env->eip;
+}
 
-    cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_guest->reset_cs, 0xffff,
-                           DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK |
-                           DESC_R_MASK | DESC_A_MASK);
+static void sev_es_set_ap_context(uint32_t reset_addr)
+{
+    CPUState *cpu;
+    struct sev_es_save_area vmsa;
+    SegmentCache cs;
+
+    cs.selector = 0xf000;
+    cs.base = reset_addr & 0xffff0000;
+    cs.limit = 0xffff;
+    cs.flags = DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | DESC_R_MASK |
+               DESC_A_MASK;
+
+    CPU_FOREACH(cpu) {
+        if (cpu->cpu_index == 0) {
+            /* Do not update the BSP reset state */
+            continue;
+        }
+        initialize_vmsa(cpu, &vmsa);
+        seg_to_vmsa(&cs, &vmsa.cs);
+        vmsa.rip = reset_addr & 0x0000ffff;
+        sev_set_cpu_context(cpu->cpu_index, &vmsa,
+                            sizeof(struct sev_es_save_area), 0);
+        sev_apply_cpu_context(cpu);
+    }
+}
 
-    env->eip = sev_guest->reset_ip;
+void sev_es_set_reset_vector(CPUState *cpu)
+{
+    if (sev_enabled()) {
+        sev_apply_cpu_context(cpu);
+    }
 }
 
 int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size)
 {
-    CPUState *cpu;
     uint32_t addr;
     int ret;
 
@@ -1238,14 +1470,12 @@  int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size)
         return ret;
     }
 
+    /*
+     * The reset vector is saved into a CPU context for each AP but not for
+     * the BSP. This is applied during guest startup or when the CPU is reset.
+     */
     if (addr) {
-        sev_guest->reset_cs = addr & 0xffff0000;
-        sev_guest->reset_ip = addr & 0x0000ffff;
-        sev_guest->reset_data_valid = true;
-
-        CPU_FOREACH(cpu) {
-            sev_es_set_reset_vector(cpu);
-        }
+        sev_es_set_ap_context(addr);
     }
 
     return 0;
diff --git a/target/i386/sev.h b/target/i386/sev.h
index e7499c95b1..1fd896d896 100644
--- a/target/i386/sev.h
+++ b/target/i386/sev.h
@@ -38,6 +38,116 @@  typedef struct SevKernelLoaderContext {
     size_t cmdline_size;
 } SevKernelLoaderContext;
 
+/* Save area definition for SEV-ES and SEV-SNP guests */
+struct QEMU_PACKED sev_es_save_area {
+    struct vmcb_seg es;
+    struct vmcb_seg cs;
+    struct vmcb_seg ss;
+    struct vmcb_seg ds;
+    struct vmcb_seg fs;
+    struct vmcb_seg gs;
+    struct vmcb_seg gdtr;
+    struct vmcb_seg ldtr;
+    struct vmcb_seg idtr;
+    struct vmcb_seg tr;
+    uint64_t vmpl0_ssp;
+    uint64_t vmpl1_ssp;
+    uint64_t vmpl2_ssp;
+    uint64_t vmpl3_ssp;
+    uint64_t u_cet;
+    uint8_t reserved_0xc8[2];
+    uint8_t vmpl;
+    uint8_t cpl;
+    uint8_t reserved_0xcc[4];
+    uint64_t efer;
+    uint8_t reserved_0xd8[104];
+    uint64_t xss;
+    uint64_t cr4;
+    uint64_t cr3;
+    uint64_t cr0;
+    uint64_t dr7;
+    uint64_t dr6;
+    uint64_t rflags;
+    uint64_t rip;
+    uint64_t dr0;
+    uint64_t dr1;
+    uint64_t dr2;
+    uint64_t dr3;
+    uint64_t dr0_addr_mask;
+    uint64_t dr1_addr_mask;
+    uint64_t dr2_addr_mask;
+    uint64_t dr3_addr_mask;
+    uint8_t reserved_0x1c0[24];
+    uint64_t rsp;
+    uint64_t s_cet;
+    uint64_t ssp;
+    uint64_t isst_addr;
+    uint64_t rax;
+    uint64_t star;
+    uint64_t lstar;
+    uint64_t cstar;
+    uint64_t sfmask;
+    uint64_t kernel_gs_base;
+    uint64_t sysenter_cs;
+    uint64_t sysenter_esp;
+    uint64_t sysenter_eip;
+    uint64_t cr2;
+    uint8_t reserved_0x248[32];
+    uint64_t g_pat;
+    uint64_t dbgctl;
+    uint64_t br_from;
+    uint64_t br_to;
+    uint64_t last_excp_from;
+    uint64_t last_excp_to;
+    uint8_t reserved_0x298[80];
+    uint32_t pkru;
+    uint32_t tsc_aux;
+    uint8_t reserved_0x2f0[24];
+    uint64_t rcx;
+    uint64_t rdx;
+    uint64_t rbx;
+    uint64_t reserved_0x320; /* rsp already available at 0x01d8 */
+    uint64_t rbp;
+    uint64_t rsi;
+    uint64_t rdi;
+    uint64_t r8;
+    uint64_t r9;
+    uint64_t r10;
+    uint64_t r11;
+    uint64_t r12;
+    uint64_t r13;
+    uint64_t r14;
+    uint64_t r15;
+    uint8_t reserved_0x380[16];
+    uint64_t guest_exit_info_1;
+    uint64_t guest_exit_info_2;
+    uint64_t guest_exit_int_info;
+    uint64_t guest_nrip;
+    uint64_t sev_features;
+    uint64_t vintr_ctrl;
+    uint64_t guest_exit_code;
+    uint64_t virtual_tom;
+    uint64_t tlb_id;
+    uint64_t pcpu_id;
+    uint64_t event_inj;
+    uint64_t xcr0;
+    uint8_t reserved_0x3f0[16];
+
+    /* Floating point area */
+    uint64_t x87_dp;
+    uint32_t mxcsr;
+    uint16_t x87_ftw;
+    uint16_t x87_fsw;
+    uint16_t x87_fcw;
+    uint16_t x87_fop;
+    uint16_t x87_ds;
+    uint16_t x87_cs;
+    uint64_t x87_rip;
+    uint8_t fpreg_x87[80];
+    uint8_t fpreg_xmm[256];
+    uint8_t fpreg_ymm[256];
+};
+
 #ifdef CONFIG_SEV
 bool sev_enabled(void);
 bool sev_es_enabled(void);