Message ID | 20181008182945.79957-1-jmattson@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/5] kvm: x86: Add payload to kvm_queued_exception and kvm_vcpu_events | expand |
> On 8 Oct 2018, at 21:29, Jim Mattson <jmattson@google.com> wrote: > > Under nested virtualization, the L1 hypervisor may intercept an > exception raised during the execution of L2 before the exception > is delivered. When the intercepted exception is #PF, the VM-exit > to the L1 hypervisor precedes the modification of CR2. When the > intercepted exception is #DB, the VM-exit to the L1 hypervisor > precedes the modifications of DR6 and DR7 under VMX, but the > VM-exit to the L1 hypervisor follows the modifications of DR6 and > DR7 under SVM. > > At present, CR2 is modified too early under both VMX and SVM. DR6 is > modified too early under VMX. DR7 is modified at the appropriate time. > Unfortunately, it is possible to exit to userspace with one of these > exceptions pending, and userspace may rely on the premature > side-effects. It is also possible for userspace to inject one of these > exceptions, in which case, userspace will presumably have already > processed the side-effects. > > To address this problem, a new per-VM capability > (KVM_CAP_EXCEPTION_PAYLOAD) will be introduced. When this capability > is enabled by userspace, the faulting linear address will be included > with the information about a pending #PF in L2, and the "new DR6 bits" > will be included with the information about a pending #DB in L2. This > ancillary exception information is carried in a new "payload" field. > > Reported-by: Jim Mattson <jmattson@google.com> > Suggested-by: Paolo Bonzini <pbonzini@redhat.com> > Signed-off-by: Jim Mattson <jmattson@google.com> > Reviewed-by: Peter Shier <pshier@google.com> > --- > Documentation/virtual/kvm/api.txt | 13 +++++++++++-- > arch/x86/include/asm/kvm_host.h | 3 +++ > arch/x86/include/uapi/asm/kvm.h | 6 ++++-- > arch/x86/kvm/x86.c | 28 ++++++++++++++++++++++++--- > tools/arch/x86/include/uapi/asm/kvm.h | 6 ++++-- > 5 files changed, 47 insertions(+), 9 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index 647f94128a85..2df2cca81cf5 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -850,7 +850,7 @@ struct kvm_vcpu_events { > __u8 injected; > __u8 nr; > __u8 has_error_code; > - __u8 pad; > + __u8 has_payload; > __u32 error_code; > } exception; > struct { > @@ -873,9 +873,11 @@ struct kvm_vcpu_events { > __u8 smm_inside_nmi; > __u8 latched_init; > } smi; > + __u32 reserved[7]; > + __u64 exception_payload; > }; > > -Only two fields are defined in the flags field: > +The following bits are defined in the flags field: > > - KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that > interrupt.shadow contains a valid state. > @@ -883,6 +885,10 @@ Only two fields are defined in the flags field: > - KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that > smi contains a valid state. > > +- KVM_VCPUEVENT_VALID_PAYLOAD may be set in the flags field to signal that > + exception.has_payload and payload contain valid state > + (i.e. KVM_CAP_EXCEPTION_PAYLOAD is enabled). > + > ARM/ARM64: > > If the guest accesses a device that is being emulated by the host kernel in > @@ -961,6 +967,9 @@ shall be written into the VCPU. > > KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available. > > +KVM_VCPUEVENT_VALID_PAYLOAD can only be set if KVM_CAP_EXCEPTION_PAYLOAD > +is enabled. > + > ARM/ARM64: > > Set the pending SError exception state for this VCPU. It is not possible to > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 09b2e3e2cf1b..026229a593f2 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -585,6 +585,8 @@ struct kvm_vcpu_arch { > bool has_error_code; > u8 nr; > u32 error_code; > + unsigned long payload; > + bool has_payload; > u8 nested_apf; > } exception; > > @@ -871,6 +873,7 @@ struct kvm_arch { > bool x2apic_broadcast_quirk_disabled; > > bool guest_can_read_msr_platform_info; > + bool exception_payload_enabled; > }; > > struct kvm_vm_stat { > diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h > index fd23d5778ea1..e3ea52bdd461 100644 > --- a/arch/x86/include/uapi/asm/kvm.h > +++ b/arch/x86/include/uapi/asm/kvm.h > @@ -288,6 +288,7 @@ struct kvm_reinject_control { > #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 > #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 > #define KVM_VCPUEVENT_VALID_SMM 0x00000008 > +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 > > /* Interrupt shadow states */ > #define KVM_X86_SHADOW_INT_MOV_SS 0x01 > @@ -299,7 +300,7 @@ struct kvm_vcpu_events { > __u8 injected; > __u8 nr; > __u8 has_error_code; > - __u8 pad; > + __u8 has_payload; > __u32 error_code; > } exception; > struct { > @@ -322,7 +323,8 @@ struct kvm_vcpu_events { > __u8 smm_inside_nmi; > __u8 latched_init; > } smi; > - __u32 reserved[9]; > + __u32 reserved[7]; > + __u64 exception_payload; > }; > > /* for KVM_GET/SET_DEBUGREGS */ > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index edbf00ec56b3..dbc538d66505 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -431,6 +431,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, > vcpu->arch.exception.has_error_code = has_error; > vcpu->arch.exception.nr = nr; > vcpu->arch.exception.error_code = error_code; > + vcpu->arch.exception.has_payload = false; > + vcpu->arch.exception.payload = 0; > return; > } > > @@ -455,6 +457,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, > vcpu->arch.exception.has_error_code = true; > vcpu->arch.exception.nr = DF_VECTOR; > vcpu->arch.exception.error_code = 0; > + vcpu->arch.exception.has_payload = false; > + vcpu->arch.exception.payload = 0; > } else > /* replace previous exception with a new one in a hope > that instruction re-execution will regenerate lost > @@ -3373,8 +3377,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, > !kvm_exception_is_soft(vcpu->arch.exception.nr); > events->exception.nr = vcpu->arch.exception.nr; > events->exception.has_error_code = vcpu->arch.exception.has_error_code; > - events->exception.pad = 0; > events->exception.error_code = vcpu->arch.exception.error_code; > + events->exception.has_payload = vcpu->arch.exception.has_payload; > + events->exception_payload = vcpu->arch.exception.payload; > > events->interrupt.injected = > vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; > @@ -3398,6 +3403,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, > events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING > | KVM_VCPUEVENT_VALID_SHADOW > | KVM_VCPUEVENT_VALID_SMM); > + if (vcpu->kvm->arch.exception_payload_enabled) > + events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD; > memset(&events->reserved, 0, sizeof(events->reserved)); > } > > @@ -3409,12 +3416,18 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, > if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING > | KVM_VCPUEVENT_VALID_SIPI_VECTOR > | KVM_VCPUEVENT_VALID_SHADOW > - | KVM_VCPUEVENT_VALID_SMM)) > + | KVM_VCPUEVENT_VALID_SMM > + | KVM_VCPUEVENT_VALID_PAYLOAD)) > + return -EINVAL; > + > + if ((events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) && > + !vcpu->kvm->arch.exception_payload_enabled) > return -EINVAL; > > if (events->exception.injected && > (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR || > - is_guest_mode(vcpu))) > + (is_guest_mode(vcpu) && > + !vcpu->kvm->arch.exception_payload_enabled))) > return -EINVAL; > > /* INITs are latched while in SMM */ > @@ -3429,6 +3442,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, > vcpu->arch.exception.nr = events->exception.nr; > vcpu->arch.exception.has_error_code = events->exception.has_error_code; > vcpu->arch.exception.error_code = events->exception.error_code; > + if (vcpu->kvm->arch.exception_payload_enabled) { > + vcpu->arch.exception.has_payload = events->exception.has_payload; > + vcpu->arch.exception.payload = events->exception_payload; > + } else { > + vcpu->arch.exception.has_payload = 0; > + vcpu->arch.exception.payload = 0; > + } > > vcpu->arch.interrupt.injected = events->interrupt.injected; > vcpu->arch.interrupt.nr = events->interrupt.nr; > @@ -9463,6 +9483,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, > vcpu->arch.exception.nr = 0; > vcpu->arch.exception.has_error_code = false; > vcpu->arch.exception.error_code = 0; > + vcpu->arch.exception.has_payload = false; > + vcpu->arch.exception.payload = 0; > } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { > fault.vector = PF_VECTOR; > fault.error_code_valid = true; > diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h > index 86299efa804a..c4e4f03ce436 100644 > --- a/tools/arch/x86/include/uapi/asm/kvm.h > +++ b/tools/arch/x86/include/uapi/asm/kvm.h > @@ -288,6 +288,7 @@ struct kvm_reinject_control { > #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 > #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 > #define KVM_VCPUEVENT_VALID_SMM 0x00000008 > +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 > > /* Interrupt shadow states */ > #define KVM_X86_SHADOW_INT_MOV_SS 0x01 > @@ -299,7 +300,7 @@ struct kvm_vcpu_events { > __u8 injected; > __u8 nr; > __u8 has_error_code; > - __u8 pad; > + __u8 has_payload; > __u32 error_code; > } exception; > struct { > @@ -322,7 +323,8 @@ struct kvm_vcpu_events { > __u8 smm_inside_nmi; > __u8 latched_init; > } smi; > - __u32 reserved[9]; > + __u32 reserved[7]; > + __u64 payload; > }; > > /* for KVM_GET/SET_DEBUGREGS */ > -- > 2.19.0.605.g01d371f741-goog > Reviewed-by: Liran Alon <liran.alon@oracle.com>
On 08/10/2018 20:29, Jim Mattson wrote: > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index 647f94128a85..2df2cca81cf5 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -850,7 +850,7 @@ struct kvm_vcpu_events { > __u8 injected; > __u8 nr; > __u8 has_error_code; > - __u8 pad; > + __u8 has_payload; > __u32 error_code; > } exception; > struct { > @@ -873,9 +873,11 @@ struct kvm_vcpu_events { > __u8 smm_inside_nmi; > __u8 latched_init; > } smi; > + __u32 reserved[7]; > + __u64 exception_payload; > }; This changes the size of the struct, and thus the ioctl number. The good part is that you don't need the capability, the bad part is that you need a struct kvm_vcpu_events2 and ioctl KVM_GET/SET_VCPU_EVENTS2. Thanks, Paolo
On Tue, Oct 9, 2018 at 9:08 AM, Paolo Bonzini <pbonzini@redhat.com> wrote: > On 08/10/2018 20:29, Jim Mattson wrote: >> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt >> index 647f94128a85..2df2cca81cf5 100644 >> --- a/Documentation/virtual/kvm/api.txt >> +++ b/Documentation/virtual/kvm/api.txt >> @@ -850,7 +850,7 @@ struct kvm_vcpu_events { >> __u8 injected; >> __u8 nr; >> __u8 has_error_code; >> - __u8 pad; >> + __u8 has_payload; >> __u32 error_code; >> } exception; >> struct { >> @@ -873,9 +873,11 @@ struct kvm_vcpu_events { >> __u8 smm_inside_nmi; >> __u8 latched_init; >> } smi; >> + __u32 reserved[7]; >> + __u64 exception_payload; >> }; > > This changes the size of the struct, and thus the ioctl number. The > good part is that you don't need the capability, the bad part is that > you need a struct kvm_vcpu_events2 and ioctl KVM_GET/SET_VCPU_EVENTS2. It's not in the api.txt file, but there's an existing __u32 reserved[9] in arch/x86/include/uapi/asm/kvm.h.
On 09/10/2018 18:25, Jim Mattson wrote: >> This changes the size of the struct, and thus the ioctl number. The >> good part is that you don't need the capability, the bad part is that >> you need a struct kvm_vcpu_events2 and ioctl KVM_GET/SET_VCPU_EVENTS2. > It's not in the api.txt file, but there's an existing __u32 > reserved[9] in arch/x86/include/uapi/asm/kvm.h. Oh, good. :) Paolo
On 08/10/2018 20:29, Jim Mattson wrote: > Under nested virtualization, the L1 hypervisor may intercept an > exception raised during the execution of L2 before the exception > is delivered. When the intercepted exception is #PF, the VM-exit > to the L1 hypervisor precedes the modification of CR2. When the > intercepted exception is #DB, the VM-exit to the L1 hypervisor > precedes the modifications of DR6 and DR7 under VMX, but the > VM-exit to the L1 hypervisor follows the modifications of DR6 and > DR7 under SVM. > > At present, CR2 is modified too early under both VMX and SVM. DR6 is > modified too early under VMX. DR7 is modified at the appropriate time. > Unfortunately, it is possible to exit to userspace with one of these > exceptions pending, and userspace may rely on the premature > side-effects. It is also possible for userspace to inject one of these > exceptions, in which case, userspace will presumably have already > processed the side-effects. > > To address this problem, a new per-VM capability > (KVM_CAP_EXCEPTION_PAYLOAD) will be introduced. When this capability > is enabled by userspace, the faulting linear address will be included > with the information about a pending #PF in L2, and the "new DR6 bits" > will be included with the information about a pending #DB in L2. This > ancillary exception information is carried in a new "payload" field. > > Reported-by: Jim Mattson <jmattson@google.com> > Suggested-by: Paolo Bonzini <pbonzini@redhat.com> > Signed-off-by: Jim Mattson <jmattson@google.com> > Reviewed-by: Peter Shier <pshier@google.com> > --- > Documentation/virtual/kvm/api.txt | 13 +++++++++++-- > arch/x86/include/asm/kvm_host.h | 3 +++ > arch/x86/include/uapi/asm/kvm.h | 6 ++++-- > arch/x86/kvm/x86.c | 28 ++++++++++++++++++++++++--- > tools/arch/x86/include/uapi/asm/kvm.h | 6 ++++-- > 5 files changed, 47 insertions(+), 9 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index 647f94128a85..2df2cca81cf5 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -850,7 +850,7 @@ struct kvm_vcpu_events { > __u8 injected; > __u8 nr; > __u8 has_error_code; > - __u8 pad; > + __u8 has_payload; > __u32 error_code; > } exception; > struct { > @@ -873,9 +873,11 @@ struct kvm_vcpu_events { > __u8 smm_inside_nmi; > __u8 latched_init; > } smi; > + __u32 reserved[7]; > + __u64 exception_payload; > }; > > -Only two fields are defined in the flags field: > +The following bits are defined in the flags field: > > - KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that > interrupt.shadow contains a valid state. > @@ -883,6 +885,10 @@ Only two fields are defined in the flags field: > - KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that > smi contains a valid state. > > +- KVM_VCPUEVENT_VALID_PAYLOAD may be set in the flags field to signal that > + exception.has_payload and payload contain valid state > + (i.e. KVM_CAP_EXCEPTION_PAYLOAD is enabled). > + > ARM/ARM64: > > If the guest accesses a device that is being emulated by the host kernel in > @@ -961,6 +967,9 @@ shall be written into the VCPU. > > KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available. > > +KVM_VCPUEVENT_VALID_PAYLOAD can only be set if KVM_CAP_EXCEPTION_PAYLOAD > +is enabled. > + > ARM/ARM64: > > Set the pending SError exception state for this VCPU. It is not possible to > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 09b2e3e2cf1b..026229a593f2 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -585,6 +585,8 @@ struct kvm_vcpu_arch { > bool has_error_code; > u8 nr; > u32 error_code; > + unsigned long payload; > + bool has_payload; > u8 nested_apf; > } exception; > > @@ -871,6 +873,7 @@ struct kvm_arch { > bool x2apic_broadcast_quirk_disabled; > > bool guest_can_read_msr_platform_info; > + bool exception_payload_enabled; > }; > > struct kvm_vm_stat { > diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h > index fd23d5778ea1..e3ea52bdd461 100644 > --- a/arch/x86/include/uapi/asm/kvm.h > +++ b/arch/x86/include/uapi/asm/kvm.h > @@ -288,6 +288,7 @@ struct kvm_reinject_control { > #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 > #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 > #define KVM_VCPUEVENT_VALID_SMM 0x00000008 > +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 > > /* Interrupt shadow states */ > #define KVM_X86_SHADOW_INT_MOV_SS 0x01 > @@ -299,7 +300,7 @@ struct kvm_vcpu_events { > __u8 injected; > __u8 nr; > __u8 has_error_code; > - __u8 pad; > + __u8 has_payload; > __u32 error_code; > } exception; > struct { > @@ -322,7 +323,8 @@ struct kvm_vcpu_events { > __u8 smm_inside_nmi; > __u8 latched_init; > } smi; > - __u32 reserved[9]; > + __u32 reserved[7]; > + __u64 exception_payload; > }; > > /* for KVM_GET/SET_DEBUGREGS */ > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index edbf00ec56b3..dbc538d66505 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -431,6 +431,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, > vcpu->arch.exception.has_error_code = has_error; > vcpu->arch.exception.nr = nr; > vcpu->arch.exception.error_code = error_code; > + vcpu->arch.exception.has_payload = false; > + vcpu->arch.exception.payload = 0; > return; > } > > @@ -455,6 +457,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, > vcpu->arch.exception.has_error_code = true; > vcpu->arch.exception.nr = DF_VECTOR; > vcpu->arch.exception.error_code = 0; > + vcpu->arch.exception.has_payload = false; > + vcpu->arch.exception.payload = 0; > } else > /* replace previous exception with a new one in a hope > that instruction re-execution will regenerate lost > @@ -3373,8 +3377,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, > !kvm_exception_is_soft(vcpu->arch.exception.nr); > events->exception.nr = vcpu->arch.exception.nr; > events->exception.has_error_code = vcpu->arch.exception.has_error_code; > - events->exception.pad = 0; > events->exception.error_code = vcpu->arch.exception.error_code; > + events->exception.has_payload = vcpu->arch.exception.has_payload; > + events->exception_payload = vcpu->arch.exception.payload; > > events->interrupt.injected = > vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; > @@ -3398,6 +3403,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, > events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING > | KVM_VCPUEVENT_VALID_SHADOW > | KVM_VCPUEVENT_VALID_SMM); > + if (vcpu->kvm->arch.exception_payload_enabled) > + events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD; > memset(&events->reserved, 0, sizeof(events->reserved)); > } > > @@ -3409,12 +3416,18 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, > if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING > | KVM_VCPUEVENT_VALID_SIPI_VECTOR > | KVM_VCPUEVENT_VALID_SHADOW > - | KVM_VCPUEVENT_VALID_SMM)) > + | KVM_VCPUEVENT_VALID_SMM > + | KVM_VCPUEVENT_VALID_PAYLOAD)) > + return -EINVAL; > + > + if ((events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) && > + !vcpu->kvm->arch.exception_payload_enabled) > return -EINVAL; > > if (events->exception.injected && > (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR || > - is_guest_mode(vcpu))) > + (is_guest_mode(vcpu) && > + !vcpu->kvm->arch.exception_payload_enabled))) > return -EINVAL; > > /* INITs are latched while in SMM */ > @@ -3429,6 +3442,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, > vcpu->arch.exception.nr = events->exception.nr; > vcpu->arch.exception.has_error_code = events->exception.has_error_code; > vcpu->arch.exception.error_code = events->exception.error_code; > + if (vcpu->kvm->arch.exception_payload_enabled) { > + vcpu->arch.exception.has_payload = events->exception.has_payload; > + vcpu->arch.exception.payload = events->exception_payload; > + } else { > + vcpu->arch.exception.has_payload = 0; > + vcpu->arch.exception.payload = 0; > + } > > vcpu->arch.interrupt.injected = events->interrupt.injected; > vcpu->arch.interrupt.nr = events->interrupt.nr; > @@ -9463,6 +9483,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, > vcpu->arch.exception.nr = 0; > vcpu->arch.exception.has_error_code = false; > vcpu->arch.exception.error_code = 0; > + vcpu->arch.exception.has_payload = false; > + vcpu->arch.exception.payload = 0; > } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { > fault.vector = PF_VECTOR; > fault.error_code_valid = true; > diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h > index 86299efa804a..c4e4f03ce436 100644 > --- a/tools/arch/x86/include/uapi/asm/kvm.h > +++ b/tools/arch/x86/include/uapi/asm/kvm.h > @@ -288,6 +288,7 @@ struct kvm_reinject_control { > #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 > #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 > #define KVM_VCPUEVENT_VALID_SMM 0x00000008 > +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 > > /* Interrupt shadow states */ > #define KVM_X86_SHADOW_INT_MOV_SS 0x01 > @@ -299,7 +300,7 @@ struct kvm_vcpu_events { > __u8 injected; > __u8 nr; > __u8 has_error_code; > - __u8 pad; > + __u8 has_payload; > __u32 error_code; > } exception; > struct { > @@ -322,7 +323,8 @@ struct kvm_vcpu_events { > __u8 smm_inside_nmi; > __u8 latched_init; > } smi; > - __u32 reserved[9]; > + __u32 reserved[7]; > + __u64 payload; > }; > > /* for KVM_GET/SET_DEBUGREGS */ > I'm not applying any of the patches yet, but I'd be happy to queue it for 4.20 even (slightly) after -rc1. Once this is done, I think we can flip nested=1. Paolo
> On 15 Oct 2018, at 20:07, Paolo Bonzini <pbonzini@redhat.com> wrote: > > On 08/10/2018 20:29, Jim Mattson wrote: >> Under nested virtualization, the L1 hypervisor may intercept an >> exception raised during the execution of L2 before the exception >> is delivered. When the intercepted exception is #PF, the VM-exit >> to the L1 hypervisor precedes the modification of CR2. When the >> intercepted exception is #DB, the VM-exit to the L1 hypervisor >> precedes the modifications of DR6 and DR7 under VMX, but the >> VM-exit to the L1 hypervisor follows the modifications of DR6 and >> DR7 under SVM. >> >> At present, CR2 is modified too early under both VMX and SVM. DR6 is >> modified too early under VMX. DR7 is modified at the appropriate time. >> Unfortunately, it is possible to exit to userspace with one of these >> exceptions pending, and userspace may rely on the premature >> side-effects. It is also possible for userspace to inject one of these >> exceptions, in which case, userspace will presumably have already >> processed the side-effects. >> >> To address this problem, a new per-VM capability >> (KVM_CAP_EXCEPTION_PAYLOAD) will be introduced. When this capability >> is enabled by userspace, the faulting linear address will be included >> with the information about a pending #PF in L2, and the "new DR6 bits" >> will be included with the information about a pending #DB in L2. This >> ancillary exception information is carried in a new "payload" field. >> >> Reported-by: Jim Mattson <jmattson@google.com> >> Suggested-by: Paolo Bonzini <pbonzini@redhat.com> >> Signed-off-by: Jim Mattson <jmattson@google.com> >> Reviewed-by: Peter Shier <pshier@google.com> >> > > I'm not applying any of the patches yet, but I'd be happy to queue it > for 4.20 even (slightly) after -rc1. Once this is done, I think we can > flip nested=1. > > Paolo Just out of curiosity, why is this specifically a decision-point for flipping nested=1? Because this is a break of userspace APIs in case of nVMX workloads? What are the conditions for flipping it? If that’s the case, this is also true for the get/set vCPU events IOCTLs. So they need to also be applied before the flip. Don’t get me wrong, I will be happy to see nested=1 by default. Just wondering… -Liran
On 15/10/2018 20:07, Liran Alon wrote: > Just out of curiosity, why is this specifically a decision-point for flipping nested=1? > Because this is a break of userspace APIs in case of nVMX workloads? > What are the conditions for flipping it? > > If that’s the case, this is also true for the get/set vCPU events IOCTLs. > So they need to also be applied before the flip. Yes, I meant v2 of the whole series. It is not strictly necessary to have this before the flip, but we're close enough that it makes sense to order that. Paolo
> On 15 Oct 2018, at 21:42, Paolo Bonzini <pbonzini@redhat.com> wrote: > > On 15/10/2018 20:07, Liran Alon wrote: >> Just out of curiosity, why is this specifically a decision-point for flipping nested=1? >> Because this is a break of userspace APIs in case of nVMX workloads? >> What are the conditions for flipping it? >> >> If that’s the case, this is also true for the get/set vCPU events IOCTLs. >> So they need to also be applied before the flip. > > Yes, I meant v2 of the whole series. It is not strictly necessary to > have this before the flip, but we're close enough that it makes sense to > order that. > > Paolo Kinda weird that this decision seem to easily have been taken for AMD in commit: 4b6e4dca7011 ("KVM: SVM: enable nested svm by default”) I highly doubt that KVM nSVM is really more stable than KVM nVMX… But maybe I’m wrong. -Liran
On 16/10/2018 01:01, Liran Alon wrote: >> Yes, I meant v2 of the whole series. It is not strictly necessary to >> have this before the flip, but we're close enough that it makes sense to >> order that. >> >> Paolo > Kinda weird that this decision seem to easily have been taken for AMD in commit: > 4b6e4dca7011 ("KVM: SVM: enable nested svm by default”) > I highly doubt that KVM nSVM is really more stable than KVM nVMX… > But maybe I’m wrong. No, indeed. That was not a good idea in retrospect. Paolo
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 647f94128a85..2df2cca81cf5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -850,7 +850,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 has_payload; __u32 error_code; } exception; struct { @@ -873,9 +873,11 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; + __u32 reserved[7]; + __u64 exception_payload; }; -Only two fields are defined in the flags field: +The following bits are defined in the flags field: - KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that interrupt.shadow contains a valid state. @@ -883,6 +885,10 @@ Only two fields are defined in the flags field: - KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that smi contains a valid state. +- KVM_VCPUEVENT_VALID_PAYLOAD may be set in the flags field to signal that + exception.has_payload and payload contain valid state + (i.e. KVM_CAP_EXCEPTION_PAYLOAD is enabled). + ARM/ARM64: If the guest accesses a device that is being emulated by the host kernel in @@ -961,6 +967,9 @@ shall be written into the VCPU. KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available. +KVM_VCPUEVENT_VALID_PAYLOAD can only be set if KVM_CAP_EXCEPTION_PAYLOAD +is enabled. + ARM/ARM64: Set the pending SError exception state for this VCPU. It is not possible to diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09b2e3e2cf1b..026229a593f2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -585,6 +585,8 @@ struct kvm_vcpu_arch { bool has_error_code; u8 nr; u32 error_code; + unsigned long payload; + bool has_payload; u8 nested_apf; } exception; @@ -871,6 +873,7 @@ struct kvm_arch { bool x2apic_broadcast_quirk_disabled; bool guest_can_read_msr_platform_info; + bool exception_payload_enabled; }; struct kvm_vm_stat { diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index fd23d5778ea1..e3ea52bdd461 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -288,6 +288,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -299,7 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 has_payload; __u32 error_code; } exception; struct { @@ -322,7 +323,8 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u32 reserved[7]; + __u64 exception_payload; }; /* for KVM_GET/SET_DEBUGREGS */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index edbf00ec56b3..dbc538d66505 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -431,6 +431,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, vcpu->arch.exception.has_error_code = has_error; vcpu->arch.exception.nr = nr; vcpu->arch.exception.error_code = error_code; + vcpu->arch.exception.has_payload = false; + vcpu->arch.exception.payload = 0; return; } @@ -455,6 +457,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, vcpu->arch.exception.has_error_code = true; vcpu->arch.exception.nr = DF_VECTOR; vcpu->arch.exception.error_code = 0; + vcpu->arch.exception.has_payload = false; + vcpu->arch.exception.payload = 0; } else /* replace previous exception with a new one in a hope that instruction re-execution will regenerate lost @@ -3373,8 +3377,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, !kvm_exception_is_soft(vcpu->arch.exception.nr); events->exception.nr = vcpu->arch.exception.nr; events->exception.has_error_code = vcpu->arch.exception.has_error_code; - events->exception.pad = 0; events->exception.error_code = vcpu->arch.exception.error_code; + events->exception.has_payload = vcpu->arch.exception.has_payload; + events->exception_payload = vcpu->arch.exception.payload; events->interrupt.injected = vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; @@ -3398,6 +3403,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SHADOW | KVM_VCPUEVENT_VALID_SMM); + if (vcpu->kvm->arch.exception_payload_enabled) + events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD; memset(&events->reserved, 0, sizeof(events->reserved)); } @@ -3409,12 +3416,18 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR | KVM_VCPUEVENT_VALID_SHADOW - | KVM_VCPUEVENT_VALID_SMM)) + | KVM_VCPUEVENT_VALID_SMM + | KVM_VCPUEVENT_VALID_PAYLOAD)) + return -EINVAL; + + if ((events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) && + !vcpu->kvm->arch.exception_payload_enabled) return -EINVAL; if (events->exception.injected && (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR || - is_guest_mode(vcpu))) + (is_guest_mode(vcpu) && + !vcpu->kvm->arch.exception_payload_enabled))) return -EINVAL; /* INITs are latched while in SMM */ @@ -3429,6 +3442,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.exception.nr = events->exception.nr; vcpu->arch.exception.has_error_code = events->exception.has_error_code; vcpu->arch.exception.error_code = events->exception.error_code; + if (vcpu->kvm->arch.exception_payload_enabled) { + vcpu->arch.exception.has_payload = events->exception.has_payload; + vcpu->arch.exception.payload = events->exception_payload; + } else { + vcpu->arch.exception.has_payload = 0; + vcpu->arch.exception.payload = 0; + } vcpu->arch.interrupt.injected = events->interrupt.injected; vcpu->arch.interrupt.nr = events->interrupt.nr; @@ -9463,6 +9483,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, vcpu->arch.exception.nr = 0; vcpu->arch.exception.has_error_code = false; vcpu->arch.exception.error_code = 0; + vcpu->arch.exception.has_payload = false; + vcpu->arch.exception.payload = 0; } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { fault.vector = PF_VECTOR; fault.error_code_valid = true; diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 86299efa804a..c4e4f03ce436 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -288,6 +288,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -299,7 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 has_payload; __u32 error_code; } exception; struct { @@ -322,7 +323,8 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u32 reserved[7]; + __u64 payload; }; /* for KVM_GET/SET_DEBUGREGS */