diff mbox series

[v3] KVM, SEV: Add KVM_EXIT_SHUTDOWN metadata for SEV-ES

Message ID 20220330182821.2633150-1-pgonda@google.com (mailing list archive)
State New, archived
Headers show
Series [v3] KVM, SEV: Add KVM_EXIT_SHUTDOWN metadata for SEV-ES | expand

Commit Message

Peter Gonda March 30, 2022, 6:28 p.m. UTC
SEV-ES guests can request termination using the GHCB's MSR protocol. See
AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a
guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL)
return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run
struct the userspace VMM can clearly see the guest has requested a SEV-ES
termination including the termination reason code set and reason code.

Signed-off-by: Peter Gonda <pgonda@google.com>

---
V3
 * Add Documentation/ update.
 * Updated other KVM_EXIT_SHUTDOWN exits to clear ndata and set reason
   to KVM_SHUTDOWN_REQ.

V2
 * Add KVM_CAP_EXIT_SHUTDOWN_REASON check for KVM_CHECK_EXTENSION.

Tested by making an SEV-ES guest call sev_es_terminate() with hardcoded
reason code set and reason code and then observing the codes from the
userspace VMM in the kvm_run.shutdown.data fields.

Change-Id: I55dcdf0f42bfd70d0e59829ae70c2fb067b60809
---
 Documentation/virt/kvm/api.rst | 12 ++++++++++++
 arch/x86/kvm/svm/sev.c         |  9 +++++++--
 arch/x86/kvm/svm/svm.c         |  2 ++
 arch/x86/kvm/vmx/vmx.c         |  2 ++
 arch/x86/kvm/x86.c             |  2 ++
 include/uapi/linux/kvm.h       | 13 +++++++++++++
 virt/kvm/kvm_main.c            |  1 +
 7 files changed, 39 insertions(+), 2 deletions(-)

Comments

Marc Orr March 31, 2022, 4:20 a.m. UTC | #1
On Wed, Mar 30, 2022 at 11:44 AM Peter Gonda <pgonda@google.com> wrote:
>
> SEV-ES guests can request termination using the GHCB's MSR protocol. See
> AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a
> guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL)
> return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run
> struct the userspace VMM can clearly see the guest has requested a SEV-ES
> termination including the termination reason code set and reason code.
>
> Signed-off-by: Peter Gonda <pgonda@google.com>
>
> ---
> V3
>  * Add Documentation/ update.
>  * Updated other KVM_EXIT_SHUTDOWN exits to clear ndata and set reason
>    to KVM_SHUTDOWN_REQ.
>
> V2
>  * Add KVM_CAP_EXIT_SHUTDOWN_REASON check for KVM_CHECK_EXTENSION.
>
> Tested by making an SEV-ES guest call sev_es_terminate() with hardcoded
> reason code set and reason code and then observing the codes from the
> userspace VMM in the kvm_run.shutdown.data fields.
>
> Change-Id: I55dcdf0f42bfd70d0e59829ae70c2fb067b60809
> ---
>  Documentation/virt/kvm/api.rst | 12 ++++++++++++
>  arch/x86/kvm/svm/sev.c         |  9 +++++++--
>  arch/x86/kvm/svm/svm.c         |  2 ++
>  arch/x86/kvm/vmx/vmx.c         |  2 ++
>  arch/x86/kvm/x86.c             |  2 ++
>  include/uapi/linux/kvm.h       | 13 +++++++++++++
>  virt/kvm/kvm_main.c            |  1 +
>  7 files changed, 39 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 2aebb89576d1..d53a66a3760e 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -7834,3 +7834,15 @@ only be invoked on a VM prior to the creation of VCPUs.
>  At this time, KVM_PMU_CAP_DISABLE is the only capability.  Setting
>  this capability will disable PMU virtualization for that VM.  Usermode
>  should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
> +
> +8.36 KVM_CAP_EXIT_SHUTDOWN_REASON
> +---------------------------
> +
> +:Capability KVM_CAP_EXIT_SHUTDOWN_REASON
> +:Architectures: x86
> +:Type: vm
> +
> +This capability means shutdown metadata may be included in
> +kvm_run.shutdown when a vCPU exits with KVM_EXIT_SHUTDOWN. This
> +may help userspace determine the guest's reason for termination and
> +if the guest should be restarted or an error caused the shutdown.
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 75fa6dd268f0..5f9d37dd3f6f 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
>                 pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
>                         reason_set, reason_code);
>
> -               ret = -EINVAL;
> -               break;
> +               vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
> +               vcpu->run->shutdown.reason = KVM_SHUTDOWN_SEV_TERM;
> +               vcpu->run->shutdown.ndata = 2;
> +               vcpu->run->shutdown.data[0] = reason_set;
> +               vcpu->run->shutdown.data[1] = reason_code;
> +
> +               return 0;
>         }
>         default:
>                 /* Error, keep GHCB MSR value as-is */
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 6535adee3e9c..c2cc10776517 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1953,6 +1953,8 @@ static int shutdown_interception(struct kvm_vcpu *vcpu)
>         kvm_vcpu_reset(vcpu, true);
>
>         kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
> +       vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ;
> +       vcpu->run->shutdown.ndata = 0;
>         return 0;
>  }
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 84a7500cd80c..85b21fc490e4 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4988,6 +4988,8 @@ static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
>  static int handle_triple_fault(struct kvm_vcpu *vcpu)
>  {
>         vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
> +       vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ;
> +       vcpu->run->shutdown.ndata = 0;
>         vcpu->mmio_needed = 0;
>         return 0;
>  }
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index d3a9ce07a565..f7cd224a4c32 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9999,6 +9999,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>                                 kvm_x86_ops.nested_ops->triple_fault(vcpu);
>                         } else {
>                                 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
> +                               vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ;
> +                               vcpu->run->shutdown.ndata = 0;
>                                 vcpu->mmio_needed = 0;
>                                 r = 0;
>                                 goto out;
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 8616af85dc5d..017c03421c48 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -271,6 +271,12 @@ struct kvm_xen_exit {
>  #define KVM_EXIT_XEN              34
>  #define KVM_EXIT_RISCV_SBI        35
>
> +/* For KVM_EXIT_SHUTDOWN */
> +/* Standard VM shutdown request. No additional metadata provided. */
> +#define KVM_SHUTDOWN_REQ       0
> +/* SEV-ES termination request */
> +#define KVM_SHUTDOWN_SEV_TERM  1
> +
>  /* For KVM_EXIT_INTERNAL_ERROR */
>  /* Emulate instruction failed. */
>  #define KVM_INTERNAL_ERROR_EMULATION   1
> @@ -311,6 +317,12 @@ struct kvm_run {
>                 struct {
>                         __u64 hardware_exit_reason;
>                 } hw;
> +               /* KVM_EXIT_SHUTDOWN */
> +               struct {
> +                       __u64 reason;
> +                       __u32 ndata;
> +                       __u64 data[16];
> +               } shutdown;
>                 /* KVM_EXIT_FAIL_ENTRY */
>                 struct {
>                         __u64 hardware_entry_failure_reason;
> @@ -1145,6 +1157,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_PMU_CAPABILITY 212
>  #define KVM_CAP_DISABLE_QUIRKS2 213
>  #define KVM_CAP_VM_TSC_CONTROL 214
> +#define KVM_CAP_EXIT_SHUTDOWN_REASON 215
>
>  #ifdef KVM_CAP_IRQ_ROUTING
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 70e05af5ebea..03b6e472f32c 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -4299,6 +4299,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
>         case KVM_CAP_CHECK_EXTENSION_VM:
>         case KVM_CAP_ENABLE_CAP_VM:
>         case KVM_CAP_HALT_POLL:
> +       case KVM_CAP_EXIT_SHUTDOWN_REASON:
>                 return 1;
>  #ifdef CONFIG_KVM_MMIO
>         case KVM_CAP_COALESCED_MMIO:
> --
> 2.35.1.1094.g7c7d902a7c-goog
>

Reviewed-by: Marc Orr <marcorr@google.com>
Sean Christopherson March 31, 2022, 5:11 p.m. UTC | #2
+Paolo and Vitaly

In the future, I highly recommend using scripts/get_maintainers.pl.

On Wed, Mar 30, 2022, Peter Gonda wrote:
> SEV-ES guests can request termination using the GHCB's MSR protocol. See
> AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a
> guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL)
> return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run
> struct the userspace VMM can clearly see the guest has requested a SEV-ES
> termination including the termination reason code set and reason code.
> 
> Signed-off-by: Peter Gonda <pgonda@google.com>
> 
> ---
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 75fa6dd268f0..5f9d37dd3f6f 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
>  		pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
>  			reason_set, reason_code);

This pr_info() should be removed.  A malicious usersepace could spam the kernel
by constantly running a vCPU that requests termination.

> -		ret = -EINVAL;
> -		break;
> +		vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
> +		vcpu->run->shutdown.reason = KVM_SHUTDOWN_SEV_TERM;
> +		vcpu->run->shutdown.ndata = 2;
> +		vcpu->run->shutdown.data[0] = reason_set;
> +		vcpu->run->shutdown.data[1] = reason_code;

Does KVM really need to split the reason_set and reason_code?  Without reading
the spec, it's not even clear what "set" means.  Assuming it's something like
"the reason code is valid", then I don't see any reason (lol) to split the two.
If we do split them, then arguably the reason_code should be filled if and only
if reason_set is true, and that's just extra work.

Dumping the entire raw "MSR" would simplify this code and the helper to fill the
termination request.

Though I'm not actually sure KVM_EXIT_SHUTDOWN is the best exit reason.  The
exit reason used for Hyper-V's paravirt stuff is arguably more appropriate,
because in this case the guest hasn't actually encountered shutdown, rather it
has requested termination.

		/* KVM_EXIT_SYSTEM_EVENT */
		struct {
#define KVM_SYSTEM_EVENT_SHUTDOWN       1
#define KVM_SYSTEM_EVENT_RESET          2
#define KVM_SYSTEM_EVENT_CRASH          3
			__u32 type;
			__u64 flags;
		} system_event;

Though looking at system_event, isn't that missing padding after type?  Ah, KVM
doesn't actually populate flags, wonderful.  Maybe we can get away with tweaking
it to:

		struct {
			__u32 type;
			__u32 ndata;
			__u64 data[16];
		} system_event;

> +
> +		return 0;
>  	}
>  	default:
>  		/* Error, keep GHCB MSR value as-is */
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 6535adee3e9c..c2cc10776517 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1953,6 +1953,8 @@ static int shutdown_interception(struct kvm_vcpu *vcpu)
>  	kvm_vcpu_reset(vcpu, true);
>  
>  	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
> +	vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ;
> +	vcpu->run->shutdown.ndata = 0;
>  	return 0;
>  }
>  
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 84a7500cd80c..85b21fc490e4 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4988,6 +4988,8 @@ static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
>  static int handle_triple_fault(struct kvm_vcpu *vcpu)
>  {
>  	vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
> +	vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ;
> +	vcpu->run->shutdown.ndata = 0;

If we do piggyback KVM_EXIT_SHUTDOWN, a helper to fill the shutdown reason is
warranted, similar to prepare_emulation_failure_exit().  If the raw GHCB MSR is
dumped, the helper can be:

  void kvm_prepare_shutdown_exit(struct kvm_vcpu *vcpu, u64 *data);

where a NULL @data means ndata=0, and a non-NULL @data means ndata=1.

>  	vcpu->mmio_needed = 0;
>  	return 0;
>  }
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index d3a9ce07a565..f7cd224a4c32 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9999,6 +9999,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  				kvm_x86_ops.nested_ops->triple_fault(vcpu);
>  			} else {
>  				vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
> +				vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ;
> +				vcpu->run->shutdown.ndata = 0;
>  				vcpu->mmio_needed = 0;
>  				r = 0;
>  				goto out;
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 8616af85dc5d..017c03421c48 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -271,6 +271,12 @@ struct kvm_xen_exit {
>  #define KVM_EXIT_XEN              34
>  #define KVM_EXIT_RISCV_SBI        35
>  
> +/* For KVM_EXIT_SHUTDOWN */
> +/* Standard VM shutdown request. No additional metadata provided. */
> +#define KVM_SHUTDOWN_REQ	0

I dislike the "REQ" part.  In a triple fault scenario, the guest isn't requesting
anything.  KVM does "request" shutdown, but that's not a request from the guest's
perspective and is purely a KVM implementation detail.  I'm having trouble coming
up with a decent alternative, but that's probably another indicator that piggybacking
KVM_EXIT_SHUTDOWN may not be appropriate.

> +/* SEV-ES termination request */
> +#define KVM_SHUTDOWN_SEV_TERM	1
> +
>  /* For KVM_EXIT_INTERNAL_ERROR */
>  /* Emulate instruction failed. */
>  #define KVM_INTERNAL_ERROR_EMULATION	1
> @@ -311,6 +317,12 @@ struct kvm_run {
>  		struct {
>  			__u64 hardware_exit_reason;
>  		} hw;
> +		/* KVM_EXIT_SHUTDOWN */
> +		struct {
> +			__u64 reason;
> +			__u32 ndata;

This needs 

			__u32 pad;

to ensure data[16] is aligned regardless of compiler.  Though it's simpler to
just do:

		struct {
			__u32 reason;
			__u32 ndata;
			__u64 data[16];
		}

because 4 billion reasons is probably enough :-)

> +			__u64 data[16];
> +		} shutdown;
>  		/* KVM_EXIT_FAIL_ENTRY */
>  		struct {
>  			__u64 hardware_entry_failure_reason;
> @@ -1145,6 +1157,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_PMU_CAPABILITY 212
>  #define KVM_CAP_DISABLE_QUIRKS2 213
>  #define KVM_CAP_VM_TSC_CONTROL 214
> +#define KVM_CAP_EXIT_SHUTDOWN_REASON 215
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 70e05af5ebea..03b6e472f32c 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -4299,6 +4299,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
>  	case KVM_CAP_CHECK_EXTENSION_VM:
>  	case KVM_CAP_ENABLE_CAP_VM:
>  	case KVM_CAP_HALT_POLL:
> +	case KVM_CAP_EXIT_SHUTDOWN_REASON:
>  		return 1;
>  #ifdef CONFIG_KVM_MMIO
>  	case KVM_CAP_COALESCED_MMIO:
> -- 
> 2.35.1.1094.g7c7d902a7c-goog
>
Paolo Bonzini March 31, 2022, 5:27 p.m. UTC | #3
On 3/31/22 19:11, Sean Christopherson wrote:
> 		/* KVM_EXIT_SYSTEM_EVENT */
> 		struct {
> #define KVM_SYSTEM_EVENT_SHUTDOWN       1
> #define KVM_SYSTEM_EVENT_RESET          2
> #define KVM_SYSTEM_EVENT_CRASH          3
> 			__u32 type;
> 			__u64 flags;
> 		} system_event;
> 
> Though looking at system_event, isn't that missing padding after type?  Ah, KVM
> doesn't actually populate flags, wonderful.  Maybe we can get away with tweaking
> it to:
> 
> 		struct {
> 			__u32 type;
> 			__u32 ndata;
> 			__u64 data[16];
> 		} system_event;

Yes, you can do that and say that the ndata is only valid e.g. if bit 31 
is set in type.

I agree with reusing KVM_EXIT_SYSTEM_EVENT, that would be a much smaller 
patch.  Sorry about that Peter.

Paolo
Marc Orr March 31, 2022, 5:40 p.m. UTC | #4
On Thu, Mar 31, 2022 at 10:11 AM Sean Christopherson <seanjc@google.com> wrote:
>
> +Paolo and Vitaly
>
> In the future, I highly recommend using scripts/get_maintainers.pl.
>
> On Wed, Mar 30, 2022, Peter Gonda wrote:
> > SEV-ES guests can request termination using the GHCB's MSR protocol. See
> > AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a
> > guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL)
> > return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run
> > struct the userspace VMM can clearly see the guest has requested a SEV-ES
> > termination including the termination reason code set and reason code.
> >
> > Signed-off-by: Peter Gonda <pgonda@google.com>
> >
> > ---
> > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > index 75fa6dd268f0..5f9d37dd3f6f 100644
> > --- a/arch/x86/kvm/svm/sev.c
> > +++ b/arch/x86/kvm/svm/sev.c
> > @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
> >               pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
> >                       reason_set, reason_code);
>
> This pr_info() should be removed.  A malicious usersepace could spam the kernel
> by constantly running a vCPU that requests termination.

Ah, good catch. But actually, I've found this specific pr_info _very_
useful in debugging. Sean, would you be OK to convert it to a rate
limited print?

> > -             ret = -EINVAL;
> > -             break;
> > +             vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
> > +             vcpu->run->shutdown.reason = KVM_SHUTDOWN_SEV_TERM;
> > +             vcpu->run->shutdown.ndata = 2;
> > +             vcpu->run->shutdown.data[0] = reason_set;
> > +             vcpu->run->shutdown.data[1] = reason_code;
>
> Does KVM really need to split the reason_set and reason_code?  Without reading
> the spec, it's not even clear what "set" means.  Assuming it's something like
> "the reason code is valid", then I don't see any reason (lol) to split the two.
> If we do split them, then arguably the reason_code should be filled if and only
> if reason_set is true, and that's just extra work.

I think KVM should split the reason_set and reason_code. This code is
based on the GHCB spec after all. And reason_set and reason_code are
both a part of the GHCB spec. But I agree, folks shouldn't have to go
to the spec to understand what reason_set and reason_code are. Rather
than not splitting them up, can we add comments in the source to
explain what they mean?

Also, my understanding from reading the spec is that reason_code
should always be filled, even when reason_set is 0. See below. But
basically, you can have reason_set: 0 and reason_code: non-zero.

Quoting the spec:
The reason code set is meant to provide hypervisors with their own
termination SEV-ES Guest-Hypervisor Communication Block
Standardization reason codes. This document defines and owns reason
code set 0x0 and the following reason codes (GHCBData[23:16]):
0x00 – General termination request
0x01 – SEV-ES / GHCB Protocol range is not supported.
0x02 – SEV-SNP features not supported
Marc Orr March 31, 2022, 5:47 p.m. UTC | #5
On Thu, Mar 31, 2022 at 10:40 AM Marc Orr <marcorr@google.com> wrote:
>
> On Thu, Mar 31, 2022 at 10:11 AM Sean Christopherson <seanjc@google.com> wrote:
> >
> > +Paolo and Vitaly
> >
> > In the future, I highly recommend using scripts/get_maintainers.pl.
> >
> > On Wed, Mar 30, 2022, Peter Gonda wrote:
> > > SEV-ES guests can request termination using the GHCB's MSR protocol. See
> > > AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a
> > > guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL)
> > > return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run
> > > struct the userspace VMM can clearly see the guest has requested a SEV-ES
> > > termination including the termination reason code set and reason code.
> > >
> > > Signed-off-by: Peter Gonda <pgonda@google.com>
> > >
> > > ---
> > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > > index 75fa6dd268f0..5f9d37dd3f6f 100644
> > > --- a/arch/x86/kvm/svm/sev.c
> > > +++ b/arch/x86/kvm/svm/sev.c
> > > @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
> > >               pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
> > >                       reason_set, reason_code);
> >
> > This pr_info() should be removed.  A malicious usersepace could spam the kernel
> > by constantly running a vCPU that requests termination.

Though... this patch makes this pr_info redundant! Since we'll now
report this in userspace. Actually, I'd be OK to remove it.

> Ah, good catch. But actually, I've found this specific pr_info _very_
> useful in debugging. Sean, would you be OK to convert it to a rate
> limited print?
>
> > > -             ret = -EINVAL;
> > > -             break;
> > > +             vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
> > > +             vcpu->run->shutdown.reason = KVM_SHUTDOWN_SEV_TERM;
> > > +             vcpu->run->shutdown.ndata = 2;
> > > +             vcpu->run->shutdown.data[0] = reason_set;
> > > +             vcpu->run->shutdown.data[1] = reason_code;
> >
> > Does KVM really need to split the reason_set and reason_code?  Without reading
> > the spec, it's not even clear what "set" means.  Assuming it's something like
> > "the reason code is valid", then I don't see any reason (lol) to split the two.
> > If we do split them, then arguably the reason_code should be filled if and only
> > if reason_set is true, and that's just extra work.
>
> I think KVM should split the reason_set and reason_code. This code is
> based on the GHCB spec after all. And reason_set and reason_code are
> both a part of the GHCB spec. But I agree, folks shouldn't have to go
> to the spec to understand what reason_set and reason_code are. Rather
> than not splitting them up, can we add comments in the source to
> explain what they mean?
>
> Also, my understanding from reading the spec is that reason_code
> should always be filled, even when reason_set is 0. See below. But
> basically, you can have reason_set: 0 and reason_code: non-zero.
>
> Quoting the spec:
> The reason code set is meant to provide hypervisors with their own
> termination SEV-ES Guest-Hypervisor Communication Block
> Standardization reason codes. This document defines and owns reason
> code set 0x0 and the following reason codes (GHCBData[23:16]):
> 0x00 – General termination request
> 0x01 – SEV-ES / GHCB Protocol range is not supported.
> 0x02 – SEV-SNP features not supported

Reading this again, I now see that "reason_set" sounds like "The
reason code is set". I bet that's how Sean read it during his review.
So yeah, this needs comments :-)!
Peter Gonda March 31, 2022, 6:43 p.m. UTC | #6
On Thu, Mar 31, 2022 at 11:48 AM Marc Orr <marcorr@google.com> wrote:
>
> On Thu, Mar 31, 2022 at 10:40 AM Marc Orr <marcorr@google.com> wrote:
> >
> > On Thu, Mar 31, 2022 at 10:11 AM Sean Christopherson <seanjc@google.com> wrote:
> > >
> > > +Paolo and Vitaly
> > >
> > > In the future, I highly recommend using scripts/get_maintainers.pl.
> > >
> > > On Wed, Mar 30, 2022, Peter Gonda wrote:
> > > > SEV-ES guests can request termination using the GHCB's MSR protocol. See
> > > > AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a
> > > > guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL)
> > > > return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run
> > > > struct the userspace VMM can clearly see the guest has requested a SEV-ES
> > > > termination including the termination reason code set and reason code.
> > > >
> > > > Signed-off-by: Peter Gonda <pgonda@google.com>
> > > >
> > > > ---
> > > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > > > index 75fa6dd268f0..5f9d37dd3f6f 100644
> > > > --- a/arch/x86/kvm/svm/sev.c
> > > > +++ b/arch/x86/kvm/svm/sev.c
> > > > @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
> > > >               pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
> > > >                       reason_set, reason_code);
> > >
> > > This pr_info() should be removed.  A malicious usersepace could spam the kernel
> > > by constantly running a vCPU that requests termination.
>
> Though... this patch makes this pr_info redundant! Since we'll now
> report this in userspace. Actually, I'd be OK to remove it.

I'll make this 2 patches. This current patch and another to rate limit
this pr_info() I think this patch is doing a lot already so would
prefer to just add a second. Is that reasonable?

>
> > Ah, good catch. But actually, I've found this specific pr_info _very_
> > useful in debugging. Sean, would you be OK to convert it to a rate
> > limited print?
> >
> > > > -             ret = -EINVAL;
> > > > -             break;
> > > > +             vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
> > > > +             vcpu->run->shutdown.reason = KVM_SHUTDOWN_SEV_TERM;
> > > > +             vcpu->run->shutdown.ndata = 2;
> > > > +             vcpu->run->shutdown.data[0] = reason_set;
> > > > +             vcpu->run->shutdown.data[1] = reason_code;
> > >
> > > Does KVM really need to split the reason_set and reason_code?  Without reading
> > > the spec, it's not even clear what "set" means.  Assuming it's something like
> > > "the reason code is valid", then I don't see any reason (lol) to split the two.
> > > If we do split them, then arguably the reason_code should be filled if and only
> > > if reason_set is true, and that's just extra work.
> >
> > I think KVM should split the reason_set and reason_code. This code is
> > based on the GHCB spec after all. And reason_set and reason_code are
> > both a part of the GHCB spec. But I agree, folks shouldn't have to go
> > to the spec to understand what reason_set and reason_code are. Rather
> > than not splitting them up, can we add comments in the source to
> > explain what they mean?
> >
> > Also, my understanding from reading the spec is that reason_code
> > should always be filled, even when reason_set is 0. See below. But
> > basically, you can have reason_set: 0 and reason_code: non-zero.
> >
> > Quoting the spec:
> > The reason code set is meant to provide hypervisors with their own
> > termination SEV-ES Guest-Hypervisor Communication Block
> > Standardization reason codes. This document defines and owns reason
> > code set 0x0 and the following reason codes (GHCBData[23:16]):
> > 0x00 – General termination request
> > 0x01 – SEV-ES / GHCB Protocol range is not supported.
> > 0x02 – SEV-SNP features not supported
>
> Reading this again, I now see that "reason_set" sounds like "The
> reason code is set". I bet that's how Sean read it during his review.
> So yeah, this needs comments :-)!

I'll add comments but I agree with Marc. These are part of the GHCB
spec so for the very specific SEV-ES termination reason we should
include all the data the spec allows. Sounds OK?
Peter Gonda March 31, 2022, 6:47 p.m. UTC | #7
On Thu, Mar 31, 2022 at 11:27 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 3/31/22 19:11, Sean Christopherson wrote:
> >               /* KVM_EXIT_SYSTEM_EVENT */
> >               struct {
> > #define KVM_SYSTEM_EVENT_SHUTDOWN       1
> > #define KVM_SYSTEM_EVENT_RESET          2
> > #define KVM_SYSTEM_EVENT_CRASH          3
> >                       __u32 type;
> >                       __u64 flags;
> >               } system_event;
> >
> > Though looking at system_event, isn't that missing padding after type?  Ah, KVM
> > doesn't actually populate flags, wonderful.  Maybe we can get away with tweaking
> > it to:
> >
> >               struct {
> >                       __u32 type;
> >                       __u32 ndata;
> >                       __u64 data[16];
> >               } system_event;
>
> Yes, you can do that and say that the ndata is only valid e.g. if bit 31
> is set in type.
>
> I agree with reusing KVM_EXIT_SYSTEM_EVENT, that would be a much smaller
> patch.  Sorry about that Peter.

KVM_EXIT_SYSTEM_EVENT makes sense. No worries, I appreciate the very
detailed feedback. I'll update this for a V4 and have a second patch
to address the pr_info()

>
> Paolo
>
Sean Christopherson March 31, 2022, 6:54 p.m. UTC | #8
On Thu, Mar 31, 2022, Peter Gonda wrote:
> On Thu, Mar 31, 2022 at 11:48 AM Marc Orr <marcorr@google.com> wrote:
> >
> > On Thu, Mar 31, 2022 at 10:40 AM Marc Orr <marcorr@google.com> wrote:
> > >
> > > On Thu, Mar 31, 2022 at 10:11 AM Sean Christopherson <seanjc@google.com> wrote:
> > > >
> > > > +Paolo and Vitaly
> > > >
> > > > In the future, I highly recommend using scripts/get_maintainers.pl.
> > > >
> > > > On Wed, Mar 30, 2022, Peter Gonda wrote:
> > > > > SEV-ES guests can request termination using the GHCB's MSR protocol. See
> > > > > AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a
> > > > > guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL)
> > > > > return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run
> > > > > struct the userspace VMM can clearly see the guest has requested a SEV-ES
> > > > > termination including the termination reason code set and reason code.
> > > > >
> > > > > Signed-off-by: Peter Gonda <pgonda@google.com>
> > > > >
> > > > > ---
> > > > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > > > > index 75fa6dd268f0..5f9d37dd3f6f 100644
> > > > > --- a/arch/x86/kvm/svm/sev.c
> > > > > +++ b/arch/x86/kvm/svm/sev.c
> > > > > @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
> > > > >               pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
> > > > >                       reason_set, reason_code);
> > > >
> > > > This pr_info() should be removed.  A malicious usersepace could spam the kernel
> > > > by constantly running a vCPU that requests termination.
> >
> > Though... this patch makes this pr_info redundant! Since we'll now
> > report this in userspace. Actually, I'd be OK to remove it.
> 
> I'll make this 2 patches. This current patch and another to rate limit
> this pr_info() I think this patch is doing a lot already so would
> prefer to just add a second. Is that reasonable?

I strongly prefer removing the pr_info() entirely.  As Marc pointed out, the
info is redundant when KVM properly reports the issue.  And worse, the info is
useless unless there's exactly one VM running.  Even then, it doesn't capture
which vCPU failed.  This is exactly why Jim, myself, and others, have been pushing
to avoid using dmesg to report guest errors.  They're helpful for initial
development, but dead weight for production, and if they're helpful for development
then odds are good that having proper reporting in production would also be valuable.

> > > Quoting the spec:
> > > The reason code set is meant to provide hypervisors with their own
> > > termination SEV-ES Guest-Hypervisor Communication Block
> > > Standardization reason codes. This document defines and owns reason
> > > code set 0x0 and the following reason codes (GHCBData[23:16]):
> > > 0x00 – General termination request
> > > 0x01 – SEV-ES / GHCB Protocol range is not supported.
> > > 0x02 – SEV-SNP features not supported
> >
> > Reading this again, I now see that "reason_set" sounds like "The
> > reason code is set". I bet that's how Sean read it during his review.
> > So yeah, this needs comments :-)!
> 
> I'll add comments but I agree with Marc. These are part of the GHCB
> spec so for the very specific SEV-ES termination reason we should
> include all the data the spec allows. Sounds OK?

Ugh, so "set" means "set of reason codes"?  That's heinous naming.  I don't have a
strong objection to splitting, but at the same time, why not punt it to userspace?
Userspace is obviously going to have to understand what the hell "set" means
anyways...
Peter Gonda March 31, 2022, 6:59 p.m. UTC | #9
On Thu, Mar 31, 2022 at 12:54 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Thu, Mar 31, 2022, Peter Gonda wrote:
> > On Thu, Mar 31, 2022 at 11:48 AM Marc Orr <marcorr@google.com> wrote:
> > >
> > > On Thu, Mar 31, 2022 at 10:40 AM Marc Orr <marcorr@google.com> wrote:
> > > >
> > > > On Thu, Mar 31, 2022 at 10:11 AM Sean Christopherson <seanjc@google.com> wrote:
> > > > >
> > > > > +Paolo and Vitaly
> > > > >
> > > > > In the future, I highly recommend using scripts/get_maintainers.pl.
> > > > >
> > > > > On Wed, Mar 30, 2022, Peter Gonda wrote:
> > > > > > SEV-ES guests can request termination using the GHCB's MSR protocol. See
> > > > > > AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a
> > > > > > guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL)
> > > > > > return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run
> > > > > > struct the userspace VMM can clearly see the guest has requested a SEV-ES
> > > > > > termination including the termination reason code set and reason code.
> > > > > >
> > > > > > Signed-off-by: Peter Gonda <pgonda@google.com>
> > > > > >
> > > > > > ---
> > > > > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > > > > > index 75fa6dd268f0..5f9d37dd3f6f 100644
> > > > > > --- a/arch/x86/kvm/svm/sev.c
> > > > > > +++ b/arch/x86/kvm/svm/sev.c
> > > > > > @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
> > > > > >               pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
> > > > > >                       reason_set, reason_code);
> > > > >
> > > > > This pr_info() should be removed.  A malicious usersepace could spam the kernel
> > > > > by constantly running a vCPU that requests termination.
> > >
> > > Though... this patch makes this pr_info redundant! Since we'll now
> > > report this in userspace. Actually, I'd be OK to remove it.
> >
> > I'll make this 2 patches. This current patch and another to rate limit
> > this pr_info() I think this patch is doing a lot already so would
> > prefer to just add a second. Is that reasonable?
>
> I strongly prefer removing the pr_info() entirely.  As Marc pointed out, the
> info is redundant when KVM properly reports the issue.  And worse, the info is
> useless unless there's exactly one VM running.  Even then, it doesn't capture
> which vCPU failed.  This is exactly why Jim, myself, and others, have been pushing
> to avoid using dmesg to report guest errors.  They're helpful for initial
> development, but dead weight for production, and if they're helpful for development
> then odds are good that having proper reporting in production would also be valuable.

Sounds good to me. Is a second patch OK with you? I think we get a lot
of cryptic cpu run exit reasons so fixing this up when we remove
pr_infos would be good. This would be a good example without this
pr_info or this change you'd have no idea whats going on.

>
> > > > Quoting the spec:
> > > > The reason code set is meant to provide hypervisors with their own
> > > > termination SEV-ES Guest-Hypervisor Communication Block
> > > > Standardization reason codes. This document defines and owns reason
> > > > code set 0x0 and the following reason codes (GHCBData[23:16]):
> > > > 0x00 – General termination request
> > > > 0x01 – SEV-ES / GHCB Protocol range is not supported.
> > > > 0x02 – SEV-SNP features not supported
> > >
> > > Reading this again, I now see that "reason_set" sounds like "The
> > > reason code is set". I bet that's how Sean read it during his review.
> > > So yeah, this needs comments :-)!
> >
> > I'll add comments but I agree with Marc. These are part of the GHCB
> > spec so for the very specific SEV-ES termination reason we should
> > include all the data the spec allows. Sounds OK?
>
> Ugh, so "set" means "set of reason codes"?  That's heinous naming.  I don't have a
> strong objection to splitting, but at the same time, why not punt it to userspace?
> Userspace is obviously going to have to understand what the hell "set" means
> anyways...

I am fine just copying the entire MSR to userspace.
Sean Christopherson March 31, 2022, 7:02 p.m. UTC | #10
On Thu, Mar 31, 2022, Peter Gonda wrote:
> On Thu, Mar 31, 2022 at 12:54 PM Sean Christopherson <seanjc@google.com> wrote:
> >
> > On Thu, Mar 31, 2022, Peter Gonda wrote:
> > > I'll make this 2 patches. This current patch and another to rate limit
> > > this pr_info() I think this patch is doing a lot already so would
> > > prefer to just add a second. Is that reasonable?
> >
> > I strongly prefer removing the pr_info() entirely.  As Marc pointed out, the
> > info is redundant when KVM properly reports the issue.  And worse, the info is
> > useless unless there's exactly one VM running.  Even then, it doesn't capture
> > which vCPU failed.  This is exactly why Jim, myself, and others, have been pushing
> > to avoid using dmesg to report guest errors.  They're helpful for initial
> > development, but dead weight for production, and if they're helpful for development
> > then odds are good that having proper reporting in production would also be valuable.
> 
> Sounds good to me. Is a second patch OK with you? I think we get a lot
> of cryptic cpu run exit reasons so fixing this up when we remove
> pr_infos would be good. This would be a good example without this
> pr_info or this change you'd have no idea whats going on.

As in, a second patch to remove the pr_info?  Yeah, no objection.
Marc Orr March 31, 2022, 7:02 p.m. UTC | #11
> > > > > The reason code set is meant to provide hypervisors with their own
> > > > > termination SEV-ES Guest-Hypervisor Communication Block
> > > > > Standardization reason codes. This document defines and owns reason
> > > > > code set 0x0 and the following reason codes (GHCBData[23:16]):
> > > > > 0x00 – General termination request
> > > > > 0x01 – SEV-ES / GHCB Protocol range is not supported.
> > > > > 0x02 – SEV-SNP features not supported
> > > >
> > > > Reading this again, I now see that "reason_set" sounds like "The
> > > > reason code is set". I bet that's how Sean read it during his review.
> > > > So yeah, this needs comments :-)!
> > >
> > > I'll add comments but I agree with Marc. These are part of the GHCB
> > > spec so for the very specific SEV-ES termination reason we should
> > > include all the data the spec allows. Sounds OK?
> >
> > Ugh, so "set" means "set of reason codes"?  That's heinous naming.  I don't have a
> > strong objection to splitting, but at the same time, why not punt it to userspace?
> > Userspace is obviously going to have to understand what the hell "set" means
> > anyways...
>
> I am fine just copying the entire MSR to userspace.

I'm fine with it too. Thanks for the feedback, Sean!
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 2aebb89576d1..d53a66a3760e 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7834,3 +7834,15 @@  only be invoked on a VM prior to the creation of VCPUs.
 At this time, KVM_PMU_CAP_DISABLE is the only capability.  Setting
 this capability will disable PMU virtualization for that VM.  Usermode
 should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
+
+8.36 KVM_CAP_EXIT_SHUTDOWN_REASON
+---------------------------
+
+:Capability KVM_CAP_EXIT_SHUTDOWN_REASON
+:Architectures: x86
+:Type: vm
+
+This capability means shutdown metadata may be included in
+kvm_run.shutdown when a vCPU exits with KVM_EXIT_SHUTDOWN. This
+may help userspace determine the guest's reason for termination and
+if the guest should be restarted or an error caused the shutdown.
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 75fa6dd268f0..5f9d37dd3f6f 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2735,8 +2735,13 @@  static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 		pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
 			reason_set, reason_code);
 
-		ret = -EINVAL;
-		break;
+		vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+		vcpu->run->shutdown.reason = KVM_SHUTDOWN_SEV_TERM;
+		vcpu->run->shutdown.ndata = 2;
+		vcpu->run->shutdown.data[0] = reason_set;
+		vcpu->run->shutdown.data[1] = reason_code;
+
+		return 0;
 	}
 	default:
 		/* Error, keep GHCB MSR value as-is */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6535adee3e9c..c2cc10776517 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1953,6 +1953,8 @@  static int shutdown_interception(struct kvm_vcpu *vcpu)
 	kvm_vcpu_reset(vcpu, true);
 
 	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+	vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ;
+	vcpu->run->shutdown.ndata = 0;
 	return 0;
 }
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 84a7500cd80c..85b21fc490e4 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4988,6 +4988,8 @@  static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
 static int handle_triple_fault(struct kvm_vcpu *vcpu)
 {
 	vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+	vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ;
+	vcpu->run->shutdown.ndata = 0;
 	vcpu->mmio_needed = 0;
 	return 0;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d3a9ce07a565..f7cd224a4c32 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9999,6 +9999,8 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 				kvm_x86_ops.nested_ops->triple_fault(vcpu);
 			} else {
 				vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+				vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ;
+				vcpu->run->shutdown.ndata = 0;
 				vcpu->mmio_needed = 0;
 				r = 0;
 				goto out;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 8616af85dc5d..017c03421c48 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -271,6 +271,12 @@  struct kvm_xen_exit {
 #define KVM_EXIT_XEN              34
 #define KVM_EXIT_RISCV_SBI        35
 
+/* For KVM_EXIT_SHUTDOWN */
+/* Standard VM shutdown request. No additional metadata provided. */
+#define KVM_SHUTDOWN_REQ	0
+/* SEV-ES termination request */
+#define KVM_SHUTDOWN_SEV_TERM	1
+
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
 #define KVM_INTERNAL_ERROR_EMULATION	1
@@ -311,6 +317,12 @@  struct kvm_run {
 		struct {
 			__u64 hardware_exit_reason;
 		} hw;
+		/* KVM_EXIT_SHUTDOWN */
+		struct {
+			__u64 reason;
+			__u32 ndata;
+			__u64 data[16];
+		} shutdown;
 		/* KVM_EXIT_FAIL_ENTRY */
 		struct {
 			__u64 hardware_entry_failure_reason;
@@ -1145,6 +1157,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PMU_CAPABILITY 212
 #define KVM_CAP_DISABLE_QUIRKS2 213
 #define KVM_CAP_VM_TSC_CONTROL 214
+#define KVM_CAP_EXIT_SHUTDOWN_REASON 215
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 70e05af5ebea..03b6e472f32c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4299,6 +4299,7 @@  static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 	case KVM_CAP_CHECK_EXTENSION_VM:
 	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_HALT_POLL:
+	case KVM_CAP_EXIT_SHUTDOWN_REASON:
 		return 1;
 #ifdef CONFIG_KVM_MMIO
 	case KVM_CAP_COALESCED_MMIO: