@@ -4774,7 +4774,11 @@ static void x86_cpu_reset(CPUState *s)
memset(env->mtrr_fixed, 0, sizeof(env->mtrr_fixed));
env->interrupt_injected = -1;
- env->exception_injected = -1;
+ env->exception_nr = -1;
+ env->exception_pending = 0;
+ env->exception_injected = 0;
+ env->exception_has_payload = false;
+ env->exception_payload = 0;
env->nmi_injected = false;
#if !defined(CONFIG_USER_ONLY)
/* We hard-wire the BSP to the first CPU. */
@@ -1338,10 +1338,14 @@ typedef struct CPUX86State {
/* For KVM */
uint32_t mp_state;
- int32_t exception_injected;
+ int32_t exception_nr;
int32_t interrupt_injected;
uint8_t soft_interrupt;
+ uint8_t exception_pending;
+ uint8_t exception_injected;
uint8_t has_error_code;
+ uint8_t exception_has_payload;
+ uint64_t exception_payload;
uint32_t ins_len;
uint32_t sipi_vector;
bool tsc_valid;
@@ -605,7 +605,9 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in
X86CPU *x86_cpu = X86_CPU(cpu);
CPUX86State *env = &x86_cpu->env;
- env->exception_injected = -1;
+ env->exception_nr = -1;
+ env->exception_pending = 0;
+ env->exception_injected = 0;
env->interrupt_injected = -1;
env->nmi_injected = false;
if (idtvec_info & VMCS_IDT_VEC_VALID) {
@@ -619,7 +621,8 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in
break;
case VMCS_IDT_VEC_HWEXCEPTION:
case VMCS_IDT_VEC_SWEXCEPTION:
- env->exception_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
+ env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
+ env->exception_injected = 1;
break;
case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
default:
@@ -912,7 +915,8 @@ int hvf_vcpu_exec(CPUState *cpu)
macvm_set_rip(cpu, rip + ins_len);
break;
case VMX_REASON_VMCALL:
- env->exception_injected = EXCP0D_GPF;
+ env->exception_nr = EXCP0D_GPF;
+ env->exception_injected = 1;
env->has_error_code = true;
env->error_code = 0;
break;
@@ -362,8 +362,8 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
if (env->interrupt_injected != -1) {
vector = env->interrupt_injected;
intr_type = VMCS_INTR_T_SWINTR;
- } else if (env->exception_injected != -1) {
- vector = env->exception_injected;
+ } else if (env->exception_nr != -1) {
+ vector = env->exception_nr;
if (vector == EXCP03_INT3 || vector == EXCP04_INTO) {
intr_type = VMCS_INTR_T_SWEXCEPTION;
} else {
@@ -104,6 +104,7 @@ static uint32_t num_architectural_pmu_fixed_counters;
static int has_xsave;
static int has_xcrs;
static int has_pit_state2;
+static int has_exception_payload;
static bool has_msr_mcg_ext_ctl;
@@ -584,15 +585,56 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
/* Hope we are lucky for AO MCE */
}
+static void kvm_reset_exception(CPUX86State *env)
+{
+ env->exception_nr = -1;
+ env->exception_pending = 0;
+ env->exception_injected = 0;
+ env->exception_has_payload = false;
+ env->exception_payload = 0;
+}
+
+static void kvm_queue_exception(CPUX86State *env,
+ int32_t exception_nr,
+ uint8_t exception_has_payload,
+ uint64_t exception_payload)
+{
+ assert(env->exception_nr == -1);
+ assert(!env->exception_pending);
+ assert(!env->exception_injected);
+ assert(!env->exception_has_payload);
+
+ env->exception_nr = exception_nr;
+
+ if (has_exception_payload) {
+ env->exception_pending = 1;
+
+ env->exception_has_payload = exception_has_payload;
+ env->exception_payload = exception_payload;
+ } else {
+ env->exception_injected = 1;
+
+ if (exception_nr == EXCP01_DB) {
+ assert(exception_has_payload);
+ env->dr[6] = exception_payload;
+ } else if (exception_nr == EXCP0E_PAGE) {
+ assert(exception_has_payload);
+ env->cr[2] = exception_payload;
+ } else {
+ assert(!exception_has_payload);
+ }
+ }
+}
+
static int kvm_inject_mce_oldstyle(X86CPU *cpu)
{
CPUX86State *env = &cpu->env;
- if (!kvm_has_vcpu_events() && env->exception_injected == EXCP12_MCHK) {
+ if (!kvm_has_vcpu_events() && env->exception_nr == EXCP12_MCHK) {
unsigned int bank, bank_num = env->mcg_cap & 0xff;
struct kvm_x86_mce mce;
- env->exception_injected = -1;
+ kvm_reset_exception(env);
/*
* There must be at least one bank in use if an MCE is pending.
@@ -1613,6 +1655,16 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
hv_vpindex_settable = kvm_check_extension(s, KVM_CAP_HYPERV_VP_INDEX);
+ has_exception_payload = kvm_check_extension(s, KVM_CAP_EXCEPTION_PAYLOAD);
+ if (has_exception_payload) {
+ ret = kvm_vm_enable_cap(s, KVM_CAP_EXCEPTION_PAYLOAD, 0, true);
+ if (ret < 0) {
+ error_report("kvm: Failed to enable exception payload cap: %s",
+ strerror(-ret));
+ return ret;
+ }
+ }
+
ret = kvm_get_supported_msrs(s);
if (ret < 0) {
return ret;
@@ -2917,8 +2969,16 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
return 0;
}
- events.exception.injected = (env->exception_injected >= 0);
- events.exception.nr = env->exception_injected;
+ events.flags = 0;
+
+ if (has_exception_payload) {
+ events.flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
+ events.exception.pending = env->exception_pending;
+ events.exception_has_payload = env->exception_has_payload;
+ events.exception_payload = env->exception_payload;
+ }
+ events.exception.nr = env->exception_nr;
+ events.exception.injected = env->exception_injected;
events.exception.has_error_code = env->has_error_code;
events.exception.error_code = env->error_code;
@@ -2931,7 +2991,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
events.sipi_vector = env->sipi_vector;
- events.flags = 0;
if (has_msr_smbase) {
events.smi.smm = !!(env->hflags & HF_SMM_MASK);
@@ -2981,8 +3040,19 @@ static int kvm_get_vcpu_events(X86CPU *cpu)
if (ret < 0) {
return ret;
}
- env->exception_injected =
- events.exception.injected ? events.exception.nr : -1;
+
+ if (events.flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
+ env->exception_pending = events.exception.pending;
+ env->exception_has_payload = events.exception_has_payload;
+ env->exception_payload = events.exception_payload;
+ } else {
+ env->exception_pending = 0;
+ env->exception_has_payload = false;
+ }
+ env->exception_injected = events.exception.injected;
+ env->exception_nr =
+ (env->exception_pending || env->exception_injected) ?
+ events.exception.nr : -1;
env->has_error_code = events.exception.has_error_code;
env->error_code = events.exception.error_code;
@@ -3034,12 +3104,12 @@ static int kvm_guest_debug_workarounds(X86CPU *cpu)
unsigned long reinject_trap = 0;
if (!kvm_has_vcpu_events()) {
- if (env->exception_injected == EXCP01_DB) {
+ if (env->exception_nr == EXCP01_DB) {
reinject_trap = KVM_GUESTDBG_INJECT_DB;
} else if (env->exception_injected == EXCP03_INT3) {
reinject_trap = KVM_GUESTDBG_INJECT_BP;
}
- env->exception_injected = -1;
+ kvm_reset_exception(env);
}
/*
@@ -3415,13 +3485,13 @@ int kvm_arch_process_async_events(CPUState *cs)
kvm_cpu_synchronize_state(cs);
- if (env->exception_injected == EXCP08_DBLE) {
+ if (env->exception_nr == EXCP08_DBLE) {
/* this means triple fault */
qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
cs->exit_request = 1;
return 0;
}
- env->exception_injected = EXCP12_MCHK;
+ kvm_queue_exception(env, EXCP12_MCHK, 0, 0);
env->has_error_code = 0;
cs->halted = 0;
@@ -3636,14 +3706,13 @@ static int kvm_handle_debug(X86CPU *cpu,
}
if (ret == 0) {
cpu_synchronize_state(cs);
- assert(env->exception_injected == -1);
+ assert(env->exception_nr == -1);
/* pass to guest */
- env->exception_injected = arch_info->exception;
+ kvm_queue_exception(env, arch_info->exception,
+ arch_info->exception == EXCP01_DB,
+ arch_info->dr6);
env->has_error_code = 0;
- if (arch_info->exception == EXCP01_DB) {
- env->dr[6] = arch_info->dr6;
- }
}
return ret;
@@ -240,6 +240,41 @@ static int cpu_pre_save(void *opaque)
}
#endif
+ /*
+ * When vCPU is running L2 and exception is still pending,
+ * it can potentially be intercepted by L1 hypervisor.
+ * In contrast to an injected exception which cannot be
+ * intercepted anymore.
+ *
+ * Furthermore, when a L2 exception is intercepted by L1
+ * hypervisor, it's exception payload (CR2/DR6 on #PF/#DB)
+ * should not be set yet in the respective vCPU register.
+ * Thus, in case an exception is pending, it is
+ * important to save the exception payload seperately.
+ *
+ * Therefore, if an exception is not in a pending state
+ * or vCPU is not in guest-mode, it is not important to
+ * distinguish between a pending and injected exception
+ * and we don't need to store seperately the exception payload.
+ *
+ * In order to preserve better backwards-compatabile migration,
+ * convert a pending exception to an injected exception in
+ * case it is not important to distingiush between them
+ * as described above.
+ */
+ if (env->exception_pending && !(env->hflags & HF_GUEST_MASK)) {
+ env->exception_pending = 0;
+ env->exception_injected = 1;
+
+ if (env->exception_has_payload) {
+ if (env->exception_nr == EXCP01_DB) {
+ env->dr[6] = env->exception_payload;
+ } else if (env->exception_nr == EXCP0E_PAGE) {
+ env->cr[2] = env->exception_payload;
+ }
+ }
+ }
+
return 0;
}
@@ -297,6 +332,23 @@ static int cpu_post_load(void *opaque, int version_id)
}
#endif
+ /*
+ * There are cases that we can get valid exception_nr with both
+ * exception_pending and exception_injected being cleared.
+ * This can happen in one of the following scenarios:
+ * 1) Source is older QEMU without KVM_CAP_EXCEPTION_PAYLOAD support.
+ * 2) Source is running on kernel without KVM_CAP_EXCEPTION_PAYLOAD support.
+ * 3) "cpu/exception_info" subsection not sent because there is no exception
+ * pending or guest wasn't running L2 (See comment in cpu_pre_save()).
+ *
+ * In those cases, we can just deduce that a valid exception_nr means
+ * we can treat the exception as already injected.
+ */
+ if ((env->exception_nr != -1) &&
+ !env->exception_pending && !env->exception_injected) {
+ env->exception_injected = 1;
+ }
+
env->fpstt = (env->fpus_vmstate >> 11) & 7;
env->fpus = env->fpus_vmstate & ~0x3800;
env->fptag_vmstate ^= 0xff;
@@ -342,6 +394,35 @@ static bool steal_time_msr_needed(void *opaque)
return cpu->env.steal_time_msr != 0;
}
+static bool exception_info_needed(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+
+ /*
+ * It is important to save exception-info only in case
+ * we need to distingiush between a pending and injected
+ * exception. Which is only required in case there is a
+ * pending exception and vCPU is running L2.
+ * For more info, refer to comment in cpu_pre_save().
+ */
+ return (env->exception_pending && (env->hflags & HF_GUEST_MASK));
+}
+
+static const VMStateDescription vmstate_exception_info = {
+ .name = "cpu/exception_info",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = exception_info_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT8(env.exception_pending, X86CPU),
+ VMSTATE_UINT8(env.exception_injected, X86CPU),
+ VMSTATE_UINT8(env.exception_has_payload, X86CPU),
+ VMSTATE_UINT64(env.exception_payload, X86CPU),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_steal_time_msr = {
.name = "cpu/steal_time_msr",
.version_id = 1,
@@ -1230,7 +1311,7 @@ VMStateDescription vmstate_x86_cpu = {
VMSTATE_INT32(env.interrupt_injected, X86CPU),
VMSTATE_UINT32(env.mp_state, X86CPU),
VMSTATE_UINT64(env.tsc, X86CPU),
- VMSTATE_INT32(env.exception_injected, X86CPU),
+ VMSTATE_INT32(env.exception_nr, X86CPU),
VMSTATE_UINT8(env.soft_interrupt, X86CPU),
VMSTATE_UINT8(env.nmi_injected, X86CPU),
VMSTATE_UINT8(env.nmi_pending, X86CPU),
@@ -1254,6 +1335,7 @@ VMStateDescription vmstate_x86_cpu = {
/* The above list is not sorted /wrt version numbers, watch out! */
},
.subsections = (const VMStateDescription*[]) {
+ &vmstate_exception_info,
&vmstate_async_pf_msr,
&vmstate_pv_eoi_msr,
&vmstate_steal_time_msr,