Message ID | 20220330182821.2633150-1-pgonda@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] KVM, SEV: Add KVM_EXIT_SHUTDOWN metadata for SEV-ES | expand |
On Wed, Mar 30, 2022 at 11:44 AM Peter Gonda <pgonda@google.com> wrote: > > SEV-ES guests can request termination using the GHCB's MSR protocol. See > AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a > guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL) > return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run > struct the userspace VMM can clearly see the guest has requested a SEV-ES > termination including the termination reason code set and reason code. > > Signed-off-by: Peter Gonda <pgonda@google.com> > > --- > V3 > * Add Documentation/ update. > * Updated other KVM_EXIT_SHUTDOWN exits to clear ndata and set reason > to KVM_SHUTDOWN_REQ. > > V2 > * Add KVM_CAP_EXIT_SHUTDOWN_REASON check for KVM_CHECK_EXTENSION. > > Tested by making an SEV-ES guest call sev_es_terminate() with hardcoded > reason code set and reason code and then observing the codes from the > userspace VMM in the kvm_run.shutdown.data fields. > > Change-Id: I55dcdf0f42bfd70d0e59829ae70c2fb067b60809 > --- > Documentation/virt/kvm/api.rst | 12 ++++++++++++ > arch/x86/kvm/svm/sev.c | 9 +++++++-- > arch/x86/kvm/svm/svm.c | 2 ++ > arch/x86/kvm/vmx/vmx.c | 2 ++ > arch/x86/kvm/x86.c | 2 ++ > include/uapi/linux/kvm.h | 13 +++++++++++++ > virt/kvm/kvm_main.c | 1 + > 7 files changed, 39 insertions(+), 2 deletions(-) > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst > index 2aebb89576d1..d53a66a3760e 100644 > --- a/Documentation/virt/kvm/api.rst > +++ b/Documentation/virt/kvm/api.rst > @@ -7834,3 +7834,15 @@ only be invoked on a VM prior to the creation of VCPUs. > At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting > this capability will disable PMU virtualization for that VM. Usermode > should adjust CPUID leaf 0xA to reflect that the PMU is disabled. > + > +8.36 KVM_CAP_EXIT_SHUTDOWN_REASON > +--------------------------- > + > +:Capability KVM_CAP_EXIT_SHUTDOWN_REASON > +:Architectures: x86 > +:Type: vm > + > +This capability means shutdown metadata may be included in > +kvm_run.shutdown when a vCPU exits with KVM_EXIT_SHUTDOWN. This > +may help userspace determine the guest's reason for termination and > +if the guest should be restarted or an error caused the shutdown. > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 75fa6dd268f0..5f9d37dd3f6f 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) > pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", > reason_set, reason_code); > > - ret = -EINVAL; > - break; > + vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; > + vcpu->run->shutdown.reason = KVM_SHUTDOWN_SEV_TERM; > + vcpu->run->shutdown.ndata = 2; > + vcpu->run->shutdown.data[0] = reason_set; > + vcpu->run->shutdown.data[1] = reason_code; > + > + return 0; > } > default: > /* Error, keep GHCB MSR value as-is */ > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 6535adee3e9c..c2cc10776517 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -1953,6 +1953,8 @@ static int shutdown_interception(struct kvm_vcpu *vcpu) > kvm_vcpu_reset(vcpu, true); > > kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; > + vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ; > + vcpu->run->shutdown.ndata = 0; > return 0; > } > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 84a7500cd80c..85b21fc490e4 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -4988,6 +4988,8 @@ static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu) > static int handle_triple_fault(struct kvm_vcpu *vcpu) > { > vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; > + vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ; > + vcpu->run->shutdown.ndata = 0; > vcpu->mmio_needed = 0; > return 0; > } > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index d3a9ce07a565..f7cd224a4c32 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -9999,6 +9999,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > kvm_x86_ops.nested_ops->triple_fault(vcpu); > } else { > vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; > + vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ; > + vcpu->run->shutdown.ndata = 0; > vcpu->mmio_needed = 0; > r = 0; > goto out; > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 8616af85dc5d..017c03421c48 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -271,6 +271,12 @@ struct kvm_xen_exit { > #define KVM_EXIT_XEN 34 > #define KVM_EXIT_RISCV_SBI 35 > > +/* For KVM_EXIT_SHUTDOWN */ > +/* Standard VM shutdown request. No additional metadata provided. */ > +#define KVM_SHUTDOWN_REQ 0 > +/* SEV-ES termination request */ > +#define KVM_SHUTDOWN_SEV_TERM 1 > + > /* For KVM_EXIT_INTERNAL_ERROR */ > /* Emulate instruction failed. */ > #define KVM_INTERNAL_ERROR_EMULATION 1 > @@ -311,6 +317,12 @@ struct kvm_run { > struct { > __u64 hardware_exit_reason; > } hw; > + /* KVM_EXIT_SHUTDOWN */ > + struct { > + __u64 reason; > + __u32 ndata; > + __u64 data[16]; > + } shutdown; > /* KVM_EXIT_FAIL_ENTRY */ > struct { > __u64 hardware_entry_failure_reason; > @@ -1145,6 +1157,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_PMU_CAPABILITY 212 > #define KVM_CAP_DISABLE_QUIRKS2 213 > #define KVM_CAP_VM_TSC_CONTROL 214 > +#define KVM_CAP_EXIT_SHUTDOWN_REASON 215 > > #ifdef KVM_CAP_IRQ_ROUTING > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 70e05af5ebea..03b6e472f32c 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -4299,6 +4299,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) > case KVM_CAP_CHECK_EXTENSION_VM: > case KVM_CAP_ENABLE_CAP_VM: > case KVM_CAP_HALT_POLL: > + case KVM_CAP_EXIT_SHUTDOWN_REASON: > return 1; > #ifdef CONFIG_KVM_MMIO > case KVM_CAP_COALESCED_MMIO: > -- > 2.35.1.1094.g7c7d902a7c-goog > Reviewed-by: Marc Orr <marcorr@google.com>
+Paolo and Vitaly In the future, I highly recommend using scripts/get_maintainers.pl. On Wed, Mar 30, 2022, Peter Gonda wrote: > SEV-ES guests can request termination using the GHCB's MSR protocol. See > AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a > guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL) > return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run > struct the userspace VMM can clearly see the guest has requested a SEV-ES > termination including the termination reason code set and reason code. > > Signed-off-by: Peter Gonda <pgonda@google.com> > > --- > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 75fa6dd268f0..5f9d37dd3f6f 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) > pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", > reason_set, reason_code); This pr_info() should be removed. A malicious usersepace could spam the kernel by constantly running a vCPU that requests termination. > - ret = -EINVAL; > - break; > + vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; > + vcpu->run->shutdown.reason = KVM_SHUTDOWN_SEV_TERM; > + vcpu->run->shutdown.ndata = 2; > + vcpu->run->shutdown.data[0] = reason_set; > + vcpu->run->shutdown.data[1] = reason_code; Does KVM really need to split the reason_set and reason_code? Without reading the spec, it's not even clear what "set" means. Assuming it's something like "the reason code is valid", then I don't see any reason (lol) to split the two. If we do split them, then arguably the reason_code should be filled if and only if reason_set is true, and that's just extra work. Dumping the entire raw "MSR" would simplify this code and the helper to fill the termination request. Though I'm not actually sure KVM_EXIT_SHUTDOWN is the best exit reason. The exit reason used for Hyper-V's paravirt stuff is arguably more appropriate, because in this case the guest hasn't actually encountered shutdown, rather it has requested termination. /* KVM_EXIT_SYSTEM_EVENT */ struct { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; __u64 flags; } system_event; Though looking at system_event, isn't that missing padding after type? Ah, KVM doesn't actually populate flags, wonderful. Maybe we can get away with tweaking it to: struct { __u32 type; __u32 ndata; __u64 data[16]; } system_event; > + > + return 0; > } > default: > /* Error, keep GHCB MSR value as-is */ > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 6535adee3e9c..c2cc10776517 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -1953,6 +1953,8 @@ static int shutdown_interception(struct kvm_vcpu *vcpu) > kvm_vcpu_reset(vcpu, true); > > kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; > + vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ; > + vcpu->run->shutdown.ndata = 0; > return 0; > } > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 84a7500cd80c..85b21fc490e4 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -4988,6 +4988,8 @@ static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu) > static int handle_triple_fault(struct kvm_vcpu *vcpu) > { > vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; > + vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ; > + vcpu->run->shutdown.ndata = 0; If we do piggyback KVM_EXIT_SHUTDOWN, a helper to fill the shutdown reason is warranted, similar to prepare_emulation_failure_exit(). If the raw GHCB MSR is dumped, the helper can be: void kvm_prepare_shutdown_exit(struct kvm_vcpu *vcpu, u64 *data); where a NULL @data means ndata=0, and a non-NULL @data means ndata=1. > vcpu->mmio_needed = 0; > return 0; > } > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index d3a9ce07a565..f7cd224a4c32 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -9999,6 +9999,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > kvm_x86_ops.nested_ops->triple_fault(vcpu); > } else { > vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; > + vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ; > + vcpu->run->shutdown.ndata = 0; > vcpu->mmio_needed = 0; > r = 0; > goto out; > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 8616af85dc5d..017c03421c48 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -271,6 +271,12 @@ struct kvm_xen_exit { > #define KVM_EXIT_XEN 34 > #define KVM_EXIT_RISCV_SBI 35 > > +/* For KVM_EXIT_SHUTDOWN */ > +/* Standard VM shutdown request. No additional metadata provided. */ > +#define KVM_SHUTDOWN_REQ 0 I dislike the "REQ" part. In a triple fault scenario, the guest isn't requesting anything. KVM does "request" shutdown, but that's not a request from the guest's perspective and is purely a KVM implementation detail. I'm having trouble coming up with a decent alternative, but that's probably another indicator that piggybacking KVM_EXIT_SHUTDOWN may not be appropriate. > +/* SEV-ES termination request */ > +#define KVM_SHUTDOWN_SEV_TERM 1 > + > /* For KVM_EXIT_INTERNAL_ERROR */ > /* Emulate instruction failed. */ > #define KVM_INTERNAL_ERROR_EMULATION 1 > @@ -311,6 +317,12 @@ struct kvm_run { > struct { > __u64 hardware_exit_reason; > } hw; > + /* KVM_EXIT_SHUTDOWN */ > + struct { > + __u64 reason; > + __u32 ndata; This needs __u32 pad; to ensure data[16] is aligned regardless of compiler. Though it's simpler to just do: struct { __u32 reason; __u32 ndata; __u64 data[16]; } because 4 billion reasons is probably enough :-) > + __u64 data[16]; > + } shutdown; > /* KVM_EXIT_FAIL_ENTRY */ > struct { > __u64 hardware_entry_failure_reason; > @@ -1145,6 +1157,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_PMU_CAPABILITY 212 > #define KVM_CAP_DISABLE_QUIRKS2 213 > #define KVM_CAP_VM_TSC_CONTROL 214 > +#define KVM_CAP_EXIT_SHUTDOWN_REASON 215 > > #ifdef KVM_CAP_IRQ_ROUTING > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 70e05af5ebea..03b6e472f32c 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -4299,6 +4299,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) > case KVM_CAP_CHECK_EXTENSION_VM: > case KVM_CAP_ENABLE_CAP_VM: > case KVM_CAP_HALT_POLL: > + case KVM_CAP_EXIT_SHUTDOWN_REASON: > return 1; > #ifdef CONFIG_KVM_MMIO > case KVM_CAP_COALESCED_MMIO: > -- > 2.35.1.1094.g7c7d902a7c-goog >
On 3/31/22 19:11, Sean Christopherson wrote: > /* KVM_EXIT_SYSTEM_EVENT */ > struct { > #define KVM_SYSTEM_EVENT_SHUTDOWN 1 > #define KVM_SYSTEM_EVENT_RESET 2 > #define KVM_SYSTEM_EVENT_CRASH 3 > __u32 type; > __u64 flags; > } system_event; > > Though looking at system_event, isn't that missing padding after type? Ah, KVM > doesn't actually populate flags, wonderful. Maybe we can get away with tweaking > it to: > > struct { > __u32 type; > __u32 ndata; > __u64 data[16]; > } system_event; Yes, you can do that and say that the ndata is only valid e.g. if bit 31 is set in type. I agree with reusing KVM_EXIT_SYSTEM_EVENT, that would be a much smaller patch. Sorry about that Peter. Paolo
On Thu, Mar 31, 2022 at 10:11 AM Sean Christopherson <seanjc@google.com> wrote: > > +Paolo and Vitaly > > In the future, I highly recommend using scripts/get_maintainers.pl. > > On Wed, Mar 30, 2022, Peter Gonda wrote: > > SEV-ES guests can request termination using the GHCB's MSR protocol. See > > AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a > > guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL) > > return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run > > struct the userspace VMM can clearly see the guest has requested a SEV-ES > > termination including the termination reason code set and reason code. > > > > Signed-off-by: Peter Gonda <pgonda@google.com> > > > > --- > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > > index 75fa6dd268f0..5f9d37dd3f6f 100644 > > --- a/arch/x86/kvm/svm/sev.c > > +++ b/arch/x86/kvm/svm/sev.c > > @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) > > pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", > > reason_set, reason_code); > > This pr_info() should be removed. A malicious usersepace could spam the kernel > by constantly running a vCPU that requests termination. Ah, good catch. But actually, I've found this specific pr_info _very_ useful in debugging. Sean, would you be OK to convert it to a rate limited print? > > - ret = -EINVAL; > > - break; > > + vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; > > + vcpu->run->shutdown.reason = KVM_SHUTDOWN_SEV_TERM; > > + vcpu->run->shutdown.ndata = 2; > > + vcpu->run->shutdown.data[0] = reason_set; > > + vcpu->run->shutdown.data[1] = reason_code; > > Does KVM really need to split the reason_set and reason_code? Without reading > the spec, it's not even clear what "set" means. Assuming it's something like > "the reason code is valid", then I don't see any reason (lol) to split the two. > If we do split them, then arguably the reason_code should be filled if and only > if reason_set is true, and that's just extra work. I think KVM should split the reason_set and reason_code. This code is based on the GHCB spec after all. And reason_set and reason_code are both a part of the GHCB spec. But I agree, folks shouldn't have to go to the spec to understand what reason_set and reason_code are. Rather than not splitting them up, can we add comments in the source to explain what they mean? Also, my understanding from reading the spec is that reason_code should always be filled, even when reason_set is 0. See below. But basically, you can have reason_set: 0 and reason_code: non-zero. Quoting the spec: The reason code set is meant to provide hypervisors with their own termination SEV-ES Guest-Hypervisor Communication Block Standardization reason codes. This document defines and owns reason code set 0x0 and the following reason codes (GHCBData[23:16]): 0x00 – General termination request 0x01 – SEV-ES / GHCB Protocol range is not supported. 0x02 – SEV-SNP features not supported
On Thu, Mar 31, 2022 at 10:40 AM Marc Orr <marcorr@google.com> wrote: > > On Thu, Mar 31, 2022 at 10:11 AM Sean Christopherson <seanjc@google.com> wrote: > > > > +Paolo and Vitaly > > > > In the future, I highly recommend using scripts/get_maintainers.pl. > > > > On Wed, Mar 30, 2022, Peter Gonda wrote: > > > SEV-ES guests can request termination using the GHCB's MSR protocol. See > > > AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a > > > guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL) > > > return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run > > > struct the userspace VMM can clearly see the guest has requested a SEV-ES > > > termination including the termination reason code set and reason code. > > > > > > Signed-off-by: Peter Gonda <pgonda@google.com> > > > > > > --- > > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > > > index 75fa6dd268f0..5f9d37dd3f6f 100644 > > > --- a/arch/x86/kvm/svm/sev.c > > > +++ b/arch/x86/kvm/svm/sev.c > > > @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) > > > pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", > > > reason_set, reason_code); > > > > This pr_info() should be removed. A malicious usersepace could spam the kernel > > by constantly running a vCPU that requests termination. Though... this patch makes this pr_info redundant! Since we'll now report this in userspace. Actually, I'd be OK to remove it. > Ah, good catch. But actually, I've found this specific pr_info _very_ > useful in debugging. Sean, would you be OK to convert it to a rate > limited print? > > > > - ret = -EINVAL; > > > - break; > > > + vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; > > > + vcpu->run->shutdown.reason = KVM_SHUTDOWN_SEV_TERM; > > > + vcpu->run->shutdown.ndata = 2; > > > + vcpu->run->shutdown.data[0] = reason_set; > > > + vcpu->run->shutdown.data[1] = reason_code; > > > > Does KVM really need to split the reason_set and reason_code? Without reading > > the spec, it's not even clear what "set" means. Assuming it's something like > > "the reason code is valid", then I don't see any reason (lol) to split the two. > > If we do split them, then arguably the reason_code should be filled if and only > > if reason_set is true, and that's just extra work. > > I think KVM should split the reason_set and reason_code. This code is > based on the GHCB spec after all. And reason_set and reason_code are > both a part of the GHCB spec. But I agree, folks shouldn't have to go > to the spec to understand what reason_set and reason_code are. Rather > than not splitting them up, can we add comments in the source to > explain what they mean? > > Also, my understanding from reading the spec is that reason_code > should always be filled, even when reason_set is 0. See below. But > basically, you can have reason_set: 0 and reason_code: non-zero. > > Quoting the spec: > The reason code set is meant to provide hypervisors with their own > termination SEV-ES Guest-Hypervisor Communication Block > Standardization reason codes. This document defines and owns reason > code set 0x0 and the following reason codes (GHCBData[23:16]): > 0x00 – General termination request > 0x01 – SEV-ES / GHCB Protocol range is not supported. > 0x02 – SEV-SNP features not supported Reading this again, I now see that "reason_set" sounds like "The reason code is set". I bet that's how Sean read it during his review. So yeah, this needs comments :-)!
On Thu, Mar 31, 2022 at 11:48 AM Marc Orr <marcorr@google.com> wrote: > > On Thu, Mar 31, 2022 at 10:40 AM Marc Orr <marcorr@google.com> wrote: > > > > On Thu, Mar 31, 2022 at 10:11 AM Sean Christopherson <seanjc@google.com> wrote: > > > > > > +Paolo and Vitaly > > > > > > In the future, I highly recommend using scripts/get_maintainers.pl. > > > > > > On Wed, Mar 30, 2022, Peter Gonda wrote: > > > > SEV-ES guests can request termination using the GHCB's MSR protocol. See > > > > AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a > > > > guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL) > > > > return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run > > > > struct the userspace VMM can clearly see the guest has requested a SEV-ES > > > > termination including the termination reason code set and reason code. > > > > > > > > Signed-off-by: Peter Gonda <pgonda@google.com> > > > > > > > > --- > > > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > > > > index 75fa6dd268f0..5f9d37dd3f6f 100644 > > > > --- a/arch/x86/kvm/svm/sev.c > > > > +++ b/arch/x86/kvm/svm/sev.c > > > > @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) > > > > pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", > > > > reason_set, reason_code); > > > > > > This pr_info() should be removed. A malicious usersepace could spam the kernel > > > by constantly running a vCPU that requests termination. > > Though... this patch makes this pr_info redundant! Since we'll now > report this in userspace. Actually, I'd be OK to remove it. I'll make this 2 patches. This current patch and another to rate limit this pr_info() I think this patch is doing a lot already so would prefer to just add a second. Is that reasonable? > > > Ah, good catch. But actually, I've found this specific pr_info _very_ > > useful in debugging. Sean, would you be OK to convert it to a rate > > limited print? > > > > > > - ret = -EINVAL; > > > > - break; > > > > + vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; > > > > + vcpu->run->shutdown.reason = KVM_SHUTDOWN_SEV_TERM; > > > > + vcpu->run->shutdown.ndata = 2; > > > > + vcpu->run->shutdown.data[0] = reason_set; > > > > + vcpu->run->shutdown.data[1] = reason_code; > > > > > > Does KVM really need to split the reason_set and reason_code? Without reading > > > the spec, it's not even clear what "set" means. Assuming it's something like > > > "the reason code is valid", then I don't see any reason (lol) to split the two. > > > If we do split them, then arguably the reason_code should be filled if and only > > > if reason_set is true, and that's just extra work. > > > > I think KVM should split the reason_set and reason_code. This code is > > based on the GHCB spec after all. And reason_set and reason_code are > > both a part of the GHCB spec. But I agree, folks shouldn't have to go > > to the spec to understand what reason_set and reason_code are. Rather > > than not splitting them up, can we add comments in the source to > > explain what they mean? > > > > Also, my understanding from reading the spec is that reason_code > > should always be filled, even when reason_set is 0. See below. But > > basically, you can have reason_set: 0 and reason_code: non-zero. > > > > Quoting the spec: > > The reason code set is meant to provide hypervisors with their own > > termination SEV-ES Guest-Hypervisor Communication Block > > Standardization reason codes. This document defines and owns reason > > code set 0x0 and the following reason codes (GHCBData[23:16]): > > 0x00 – General termination request > > 0x01 – SEV-ES / GHCB Protocol range is not supported. > > 0x02 – SEV-SNP features not supported > > Reading this again, I now see that "reason_set" sounds like "The > reason code is set". I bet that's how Sean read it during his review. > So yeah, this needs comments :-)! I'll add comments but I agree with Marc. These are part of the GHCB spec so for the very specific SEV-ES termination reason we should include all the data the spec allows. Sounds OK?
On Thu, Mar 31, 2022 at 11:27 AM Paolo Bonzini <pbonzini@redhat.com> wrote: > > On 3/31/22 19:11, Sean Christopherson wrote: > > /* KVM_EXIT_SYSTEM_EVENT */ > > struct { > > #define KVM_SYSTEM_EVENT_SHUTDOWN 1 > > #define KVM_SYSTEM_EVENT_RESET 2 > > #define KVM_SYSTEM_EVENT_CRASH 3 > > __u32 type; > > __u64 flags; > > } system_event; > > > > Though looking at system_event, isn't that missing padding after type? Ah, KVM > > doesn't actually populate flags, wonderful. Maybe we can get away with tweaking > > it to: > > > > struct { > > __u32 type; > > __u32 ndata; > > __u64 data[16]; > > } system_event; > > Yes, you can do that and say that the ndata is only valid e.g. if bit 31 > is set in type. > > I agree with reusing KVM_EXIT_SYSTEM_EVENT, that would be a much smaller > patch. Sorry about that Peter. KVM_EXIT_SYSTEM_EVENT makes sense. No worries, I appreciate the very detailed feedback. I'll update this for a V4 and have a second patch to address the pr_info() > > Paolo >
On Thu, Mar 31, 2022, Peter Gonda wrote: > On Thu, Mar 31, 2022 at 11:48 AM Marc Orr <marcorr@google.com> wrote: > > > > On Thu, Mar 31, 2022 at 10:40 AM Marc Orr <marcorr@google.com> wrote: > > > > > > On Thu, Mar 31, 2022 at 10:11 AM Sean Christopherson <seanjc@google.com> wrote: > > > > > > > > +Paolo and Vitaly > > > > > > > > In the future, I highly recommend using scripts/get_maintainers.pl. > > > > > > > > On Wed, Mar 30, 2022, Peter Gonda wrote: > > > > > SEV-ES guests can request termination using the GHCB's MSR protocol. See > > > > > AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a > > > > > guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL) > > > > > return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run > > > > > struct the userspace VMM can clearly see the guest has requested a SEV-ES > > > > > termination including the termination reason code set and reason code. > > > > > > > > > > Signed-off-by: Peter Gonda <pgonda@google.com> > > > > > > > > > > --- > > > > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > > > > > index 75fa6dd268f0..5f9d37dd3f6f 100644 > > > > > --- a/arch/x86/kvm/svm/sev.c > > > > > +++ b/arch/x86/kvm/svm/sev.c > > > > > @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) > > > > > pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", > > > > > reason_set, reason_code); > > > > > > > > This pr_info() should be removed. A malicious usersepace could spam the kernel > > > > by constantly running a vCPU that requests termination. > > > > Though... this patch makes this pr_info redundant! Since we'll now > > report this in userspace. Actually, I'd be OK to remove it. > > I'll make this 2 patches. This current patch and another to rate limit > this pr_info() I think this patch is doing a lot already so would > prefer to just add a second. Is that reasonable? I strongly prefer removing the pr_info() entirely. As Marc pointed out, the info is redundant when KVM properly reports the issue. And worse, the info is useless unless there's exactly one VM running. Even then, it doesn't capture which vCPU failed. This is exactly why Jim, myself, and others, have been pushing to avoid using dmesg to report guest errors. They're helpful for initial development, but dead weight for production, and if they're helpful for development then odds are good that having proper reporting in production would also be valuable. > > > Quoting the spec: > > > The reason code set is meant to provide hypervisors with their own > > > termination SEV-ES Guest-Hypervisor Communication Block > > > Standardization reason codes. This document defines and owns reason > > > code set 0x0 and the following reason codes (GHCBData[23:16]): > > > 0x00 – General termination request > > > 0x01 – SEV-ES / GHCB Protocol range is not supported. > > > 0x02 – SEV-SNP features not supported > > > > Reading this again, I now see that "reason_set" sounds like "The > > reason code is set". I bet that's how Sean read it during his review. > > So yeah, this needs comments :-)! > > I'll add comments but I agree with Marc. These are part of the GHCB > spec so for the very specific SEV-ES termination reason we should > include all the data the spec allows. Sounds OK? Ugh, so "set" means "set of reason codes"? That's heinous naming. I don't have a strong objection to splitting, but at the same time, why not punt it to userspace? Userspace is obviously going to have to understand what the hell "set" means anyways...
On Thu, Mar 31, 2022 at 12:54 PM Sean Christopherson <seanjc@google.com> wrote: > > On Thu, Mar 31, 2022, Peter Gonda wrote: > > On Thu, Mar 31, 2022 at 11:48 AM Marc Orr <marcorr@google.com> wrote: > > > > > > On Thu, Mar 31, 2022 at 10:40 AM Marc Orr <marcorr@google.com> wrote: > > > > > > > > On Thu, Mar 31, 2022 at 10:11 AM Sean Christopherson <seanjc@google.com> wrote: > > > > > > > > > > +Paolo and Vitaly > > > > > > > > > > In the future, I highly recommend using scripts/get_maintainers.pl. > > > > > > > > > > On Wed, Mar 30, 2022, Peter Gonda wrote: > > > > > > SEV-ES guests can request termination using the GHCB's MSR protocol. See > > > > > > AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a > > > > > > guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL) > > > > > > return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run > > > > > > struct the userspace VMM can clearly see the guest has requested a SEV-ES > > > > > > termination including the termination reason code set and reason code. > > > > > > > > > > > > Signed-off-by: Peter Gonda <pgonda@google.com> > > > > > > > > > > > > --- > > > > > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > > > > > > index 75fa6dd268f0..5f9d37dd3f6f 100644 > > > > > > --- a/arch/x86/kvm/svm/sev.c > > > > > > +++ b/arch/x86/kvm/svm/sev.c > > > > > > @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) > > > > > > pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", > > > > > > reason_set, reason_code); > > > > > > > > > > This pr_info() should be removed. A malicious usersepace could spam the kernel > > > > > by constantly running a vCPU that requests termination. > > > > > > Though... this patch makes this pr_info redundant! Since we'll now > > > report this in userspace. Actually, I'd be OK to remove it. > > > > I'll make this 2 patches. This current patch and another to rate limit > > this pr_info() I think this patch is doing a lot already so would > > prefer to just add a second. Is that reasonable? > > I strongly prefer removing the pr_info() entirely. As Marc pointed out, the > info is redundant when KVM properly reports the issue. And worse, the info is > useless unless there's exactly one VM running. Even then, it doesn't capture > which vCPU failed. This is exactly why Jim, myself, and others, have been pushing > to avoid using dmesg to report guest errors. They're helpful for initial > development, but dead weight for production, and if they're helpful for development > then odds are good that having proper reporting in production would also be valuable. Sounds good to me. Is a second patch OK with you? I think we get a lot of cryptic cpu run exit reasons so fixing this up when we remove pr_infos would be good. This would be a good example without this pr_info or this change you'd have no idea whats going on. > > > > > Quoting the spec: > > > > The reason code set is meant to provide hypervisors with their own > > > > termination SEV-ES Guest-Hypervisor Communication Block > > > > Standardization reason codes. This document defines and owns reason > > > > code set 0x0 and the following reason codes (GHCBData[23:16]): > > > > 0x00 – General termination request > > > > 0x01 – SEV-ES / GHCB Protocol range is not supported. > > > > 0x02 – SEV-SNP features not supported > > > > > > Reading this again, I now see that "reason_set" sounds like "The > > > reason code is set". I bet that's how Sean read it during his review. > > > So yeah, this needs comments :-)! > > > > I'll add comments but I agree with Marc. These are part of the GHCB > > spec so for the very specific SEV-ES termination reason we should > > include all the data the spec allows. Sounds OK? > > Ugh, so "set" means "set of reason codes"? That's heinous naming. I don't have a > strong objection to splitting, but at the same time, why not punt it to userspace? > Userspace is obviously going to have to understand what the hell "set" means > anyways... I am fine just copying the entire MSR to userspace.
On Thu, Mar 31, 2022, Peter Gonda wrote: > On Thu, Mar 31, 2022 at 12:54 PM Sean Christopherson <seanjc@google.com> wrote: > > > > On Thu, Mar 31, 2022, Peter Gonda wrote: > > > I'll make this 2 patches. This current patch and another to rate limit > > > this pr_info() I think this patch is doing a lot already so would > > > prefer to just add a second. Is that reasonable? > > > > I strongly prefer removing the pr_info() entirely. As Marc pointed out, the > > info is redundant when KVM properly reports the issue. And worse, the info is > > useless unless there's exactly one VM running. Even then, it doesn't capture > > which vCPU failed. This is exactly why Jim, myself, and others, have been pushing > > to avoid using dmesg to report guest errors. They're helpful for initial > > development, but dead weight for production, and if they're helpful for development > > then odds are good that having proper reporting in production would also be valuable. > > Sounds good to me. Is a second patch OK with you? I think we get a lot > of cryptic cpu run exit reasons so fixing this up when we remove > pr_infos would be good. This would be a good example without this > pr_info or this change you'd have no idea whats going on. As in, a second patch to remove the pr_info? Yeah, no objection.
> > > > > The reason code set is meant to provide hypervisors with their own > > > > > termination SEV-ES Guest-Hypervisor Communication Block > > > > > Standardization reason codes. This document defines and owns reason > > > > > code set 0x0 and the following reason codes (GHCBData[23:16]): > > > > > 0x00 – General termination request > > > > > 0x01 – SEV-ES / GHCB Protocol range is not supported. > > > > > 0x02 – SEV-SNP features not supported > > > > > > > > Reading this again, I now see that "reason_set" sounds like "The > > > > reason code is set". I bet that's how Sean read it during his review. > > > > So yeah, this needs comments :-)! > > > > > > I'll add comments but I agree with Marc. These are part of the GHCB > > > spec so for the very specific SEV-ES termination reason we should > > > include all the data the spec allows. Sounds OK? > > > > Ugh, so "set" means "set of reason codes"? That's heinous naming. I don't have a > > strong objection to splitting, but at the same time, why not punt it to userspace? > > Userspace is obviously going to have to understand what the hell "set" means > > anyways... > > I am fine just copying the entire MSR to userspace. I'm fine with it too. Thanks for the feedback, Sean!
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 2aebb89576d1..d53a66a3760e 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7834,3 +7834,15 @@ only be invoked on a VM prior to the creation of VCPUs. At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting this capability will disable PMU virtualization for that VM. Usermode should adjust CPUID leaf 0xA to reflect that the PMU is disabled. + +8.36 KVM_CAP_EXIT_SHUTDOWN_REASON +--------------------------- + +:Capability KVM_CAP_EXIT_SHUTDOWN_REASON +:Architectures: x86 +:Type: vm + +This capability means shutdown metadata may be included in +kvm_run.shutdown when a vCPU exits with KVM_EXIT_SHUTDOWN. This +may help userspace determine the guest's reason for termination and +if the guest should be restarted or an error caused the shutdown. diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 75fa6dd268f0..5f9d37dd3f6f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2735,8 +2735,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", reason_set, reason_code); - ret = -EINVAL; - break; + vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->run->shutdown.reason = KVM_SHUTDOWN_SEV_TERM; + vcpu->run->shutdown.ndata = 2; + vcpu->run->shutdown.data[0] = reason_set; + vcpu->run->shutdown.data[1] = reason_code; + + return 0; } default: /* Error, keep GHCB MSR value as-is */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 6535adee3e9c..c2cc10776517 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1953,6 +1953,8 @@ static int shutdown_interception(struct kvm_vcpu *vcpu) kvm_vcpu_reset(vcpu, true); kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ; + vcpu->run->shutdown.ndata = 0; return 0; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 84a7500cd80c..85b21fc490e4 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4988,6 +4988,8 @@ static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu) static int handle_triple_fault(struct kvm_vcpu *vcpu) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ; + vcpu->run->shutdown.ndata = 0; vcpu->mmio_needed = 0; return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d3a9ce07a565..f7cd224a4c32 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9999,6 +9999,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops.nested_ops->triple_fault(vcpu); } else { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->run->shutdown.reason = KVM_SHUTDOWN_REQ; + vcpu->run->shutdown.ndata = 0; vcpu->mmio_needed = 0; r = 0; goto out; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 8616af85dc5d..017c03421c48 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -271,6 +271,12 @@ struct kvm_xen_exit { #define KVM_EXIT_XEN 34 #define KVM_EXIT_RISCV_SBI 35 +/* For KVM_EXIT_SHUTDOWN */ +/* Standard VM shutdown request. No additional metadata provided. */ +#define KVM_SHUTDOWN_REQ 0 +/* SEV-ES termination request */ +#define KVM_SHUTDOWN_SEV_TERM 1 + /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -311,6 +317,12 @@ struct kvm_run { struct { __u64 hardware_exit_reason; } hw; + /* KVM_EXIT_SHUTDOWN */ + struct { + __u64 reason; + __u32 ndata; + __u64 data[16]; + } shutdown; /* KVM_EXIT_FAIL_ENTRY */ struct { __u64 hardware_entry_failure_reason; @@ -1145,6 +1157,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PMU_CAPABILITY 212 #define KVM_CAP_DISABLE_QUIRKS2 213 #define KVM_CAP_VM_TSC_CONTROL 214 +#define KVM_CAP_EXIT_SHUTDOWN_REASON 215 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 70e05af5ebea..03b6e472f32c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4299,6 +4299,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_CHECK_EXTENSION_VM: case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_HALT_POLL: + case KVM_CAP_EXIT_SHUTDOWN_REASON: return 1; #ifdef CONFIG_KVM_MMIO case KVM_CAP_COALESCED_MMIO:
SEV-ES guests can request termination using the GHCB's MSR protocol. See AMD's GHCB spec section '4.1.13 Termination Request'. Currently when a guest does this the userspace VMM sees an KVM_EXIT_UNKNOWN (-EVINAL) return code from KVM_RUN. By adding a KVM_EXIT_SHUTDOWN_ENTRY to kvm_run struct the userspace VMM can clearly see the guest has requested a SEV-ES termination including the termination reason code set and reason code. Signed-off-by: Peter Gonda <pgonda@google.com> --- V3 * Add Documentation/ update. * Updated other KVM_EXIT_SHUTDOWN exits to clear ndata and set reason to KVM_SHUTDOWN_REQ. V2 * Add KVM_CAP_EXIT_SHUTDOWN_REASON check for KVM_CHECK_EXTENSION. Tested by making an SEV-ES guest call sev_es_terminate() with hardcoded reason code set and reason code and then observing the codes from the userspace VMM in the kvm_run.shutdown.data fields. Change-Id: I55dcdf0f42bfd70d0e59829ae70c2fb067b60809 --- Documentation/virt/kvm/api.rst | 12 ++++++++++++ arch/x86/kvm/svm/sev.c | 9 +++++++-- arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 2 ++ arch/x86/kvm/x86.c | 2 ++ include/uapi/linux/kvm.h | 13 +++++++++++++ virt/kvm/kvm_main.c | 1 + 7 files changed, 39 insertions(+), 2 deletions(-)