diff mbox

[-v5,1/2] QEMU-KVM: MCE: Add MCE simulation to qemu/tcg

Message ID 1244528895.8361.624.camel@yhuang-dev.sh.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Huang, Ying June 9, 2009, 6:28 a.m. UTC
- MCE features are initialized when VCPU is intialized according to CPUID.
- A monitor command "mce" is added to inject a MCE.
- A new interrupt mask: CPU_INTERRUPT_MCE is added to inject the MCE.


ChangeLog:

v5:

- Rebased on latest qemu-kvm.git

v4:

- Add savevm/loadvm support


Signed-off-by: Huang Ying <ying.huang@intel.com>

---
 cpu-all.h               |    4 ++
 cpu-exec.c              |    4 ++
 monitor.c               |   49 +++++++++++++++++++++++++++++++++
 target-i386/cpu.h       |   24 +++++++++++++++-
 target-i386/helper.c    |   70 ++++++++++++++++++++++++++++++++++++++++++++++++
 target-i386/machine.c   |   28 +++++++++++++++++++
 target-i386/op_helper.c |   34 +++++++++++++++++++++++
 7 files changed, 212 insertions(+), 1 deletion(-)



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -204,6 +204,7 @@ 
 #define CR4_DE_MASK   (1 << 3)
 #define CR4_PSE_MASK  (1 << 4)
 #define CR4_PAE_MASK  (1 << 5)
+#define CR4_MCE_MASK  (1 << 6)
 #define CR4_PGE_MASK  (1 << 7)
 #define CR4_PCE_MASK  (1 << 8)
 #define CR4_OSFXSR_SHIFT 9
@@ -250,6 +251,17 @@ 
 #define PG_ERROR_RSVD_MASK 0x08
 #define PG_ERROR_I_D_MASK  0x10
 
+#define MCG_CTL_P	(1UL<<8)   /* MCG_CAP register available */
+
+#define MCE_CAP_DEF	MCG_CTL_P
+#define MCE_BANKS_DEF	10
+
+#define MCG_STATUS_MCIP	(1UL<<2)   /* machine check in progress */
+
+#define MCI_STATUS_VAL	(1UL<<63)  /* valid error */
+#define MCI_STATUS_OVER	(1UL<<62)  /* previous errors lost */
+#define MCI_STATUS_UC	(1UL<<61)  /* uncorrected error */
+
 #define MSR_IA32_TSC                    0x10
 #define MSR_IA32_APICBASE               0x1b
 #define MSR_IA32_APICBASE_BSP           (1<<8)
@@ -290,6 +302,11 @@ 
 
 #define MSR_MTRRdefType			0x2ff
 
+#define MSR_MC0_CTL			0x400
+#define MSR_MC0_STATUS			0x401
+#define MSR_MC0_ADDR			0x402
+#define MSR_MC0_MISC			0x403
+
 #define MSR_EFER                        0xc0000080
 
 #define MSR_EFER_SCE   (1 << 0)
@@ -676,6 +693,11 @@  typedef struct CPUX86State {
     /* in order to simplify APIC support, we leave this pointer to the
        user */
     struct APICState *apic_state;
+
+    uint64 mcg_cap;
+    uint64 mcg_status;
+    uint64 mcg_ctl;
+    uint64 *mce_banks;
 } CPUX86State;
 
 CPUX86State *cpu_x86_init(const char *cpu_model);
@@ -840,7 +862,7 @@  static inline int cpu_get_time_fast(void
 #define cpu_signal_handler cpu_x86_signal_handler
 #define cpu_list x86_cpu_list
 
-#define CPU_SAVE_VERSION 9
+#define CPU_SAVE_VERSION 10
 
 /* MMU modes definitions */
 #define MMU_MODE0_SUFFIX _kernel
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -3133,7 +3133,23 @@  void helper_wrmsr(void)
     case MSR_MTRRdefType:
         env->mtrr_deftype = val;
         break;
+    case MSR_MCG_STATUS:
+        env->mcg_status = val;
+        break;
+    case MSR_MCG_CTL:
+        if ((env->mcg_cap & MCG_CTL_P)
+            && (val == 0 || val == ~(uint64_t)0))
+            env->mcg_ctl = val;
+        break;
     default:
+        if ((uint32_t)ECX >= MSR_MC0_CTL
+            && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
+            uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+            if ((offset & 0x3) != 0
+                || (val == 0 || val == ~(uint64_t)0))
+                env->mce_banks[offset] = val;
+            break;
+        }
         /* XXX: exception ? */
         break;
     }
@@ -3252,7 +3268,25 @@  void helper_rdmsr(void)
             /* XXX: exception ? */
             val = 0;
         break;
+    case MSR_MCG_CAP:
+        val = env->mcg_cap;
+        break;
+    case MSR_MCG_CTL:
+        if (env->mcg_cap & MCG_CTL_P)
+            val = env->mcg_ctl;
+        else
+            val = 0;
+        break;
+    case MSR_MCG_STATUS:
+        val = env->mcg_status;
+        break;
     default:
+        if ((uint32_t)ECX >= MSR_MC0_CTL
+            && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
+            uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+            val = env->mce_banks[offset];
+            break;
+        }
         /* XXX: exception ? */
         val = 0;
         break;
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1435,6 +1435,75 @@  static void breakpoint_handler(CPUState 
 }
 #endif /* !CONFIG_USER_ONLY */
 
+/* This should come from sysemu.h - if we could include it here... */
+void qemu_system_reset_request(void);
+
+void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+                        uint64_t mcg_status, uint64_t addr, uint64_t misc)
+{
+    uint64_t mcg_cap = cenv->mcg_cap;
+    unsigned bank_num = mcg_cap & 0xff;
+    uint64_t *banks = cenv->mce_banks;
+
+    if (bank >= bank_num || !(status & MCI_STATUS_VAL))
+        return;
+
+    /*
+     * if MSR_MCG_CTL is not all 1s, the uncorrected error
+     * reporting is disabled
+     */
+    if ((status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
+        cenv->mcg_ctl != ~(uint64_t)0)
+        return;
+    banks += 4 * bank;
+    /*
+     * if MSR_MCi_CTL is not all 1s, the uncorrected error
+     * reporting is disabled for the bank
+     */
+    if ((status & MCI_STATUS_UC) && banks[0] != ~(uint64_t)0)
+        return;
+    if (status & MCI_STATUS_UC) {
+        if ((cenv->mcg_status & MCG_STATUS_MCIP) ||
+            !(cenv->cr[4] & CR4_MCE_MASK)) {
+            fprintf(stderr, "injects mce exception while previous "
+                    "one is in progress!\n");
+            qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
+            qemu_system_reset_request();
+            return;
+        }
+        if (banks[1] & MCI_STATUS_VAL)
+            status |= MCI_STATUS_OVER;
+        banks[2] = addr;
+        banks[3] = misc;
+        cenv->mcg_status = mcg_status;
+        banks[1] = status;
+        cpu_interrupt(cenv, CPU_INTERRUPT_MCE);
+    } else if (!(banks[1] & MCI_STATUS_VAL)
+               || !(banks[1] & MCI_STATUS_UC)) {
+        if (banks[1] & MCI_STATUS_VAL)
+            status |= MCI_STATUS_OVER;
+        banks[2] = addr;
+        banks[3] = misc;
+        banks[1] = status;
+    } else
+        banks[1] |= MCI_STATUS_OVER;
+}
+
+static void mce_init(CPUX86State *cenv)
+{
+    unsigned int bank, bank_num;
+
+    if (((cenv->cpuid_version >> 8)&0xf) >= 6
+        && (cenv->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)) {
+        cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF;
+        cenv->mcg_ctl = ~(uint64_t)0;
+        bank_num = cenv->mcg_cap & 0xff;
+        cenv->mce_banks = qemu_mallocz(bank_num * sizeof(uint64_t) * 4);
+        for (bank = 0; bank < bank_num; bank++)
+            cenv->mce_banks[bank*4] = ~(uint64_t)0;
+    }
+}
+
 static void host_cpuid(uint32_t function, uint32_t count,
                        uint32_t *eax, uint32_t *ebx,
                        uint32_t *ecx, uint32_t *edx)
@@ -1694,6 +1763,7 @@  CPUX86State *cpu_x86_init(const char *cp
         cpu_x86_close(env);
         return NULL;
     }
+    mce_init(env);
     cpu_reset(env);
 #ifdef CONFIG_KQEMU
     kqemu_init(env);
--- a/monitor.c
+++ b/monitor.c
@@ -1682,6 +1682,28 @@  static void do_acl(Monitor *mon,
     }
 }
 
+#if defined(TARGET_I386)
+static void do_inject_mce(Monitor *mon,
+                          int cpu_index, int bank,
+                          unsigned status_hi, unsigned status_lo,
+                          unsigned mcg_status_hi, unsigned mcg_status_lo,
+                          unsigned addr_hi, unsigned addr_lo,
+                          unsigned misc_hi, unsigned misc_lo)
+{
+    CPUState *cenv;
+    uint64_t status = ((uint64_t)status_hi << 32) | status_lo;
+    uint64_t mcg_status = ((uint64_t)mcg_status_hi << 32) | mcg_status_lo;
+    uint64_t addr = ((uint64_t)addr_hi << 32) | addr_lo;
+    uint64_t misc = ((uint64_t)misc_hi << 32) | misc_lo;
+
+    for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu)
+        if (cenv->cpu_index == cpu_index && cenv->mcg_cap) {
+            cpu_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc);
+            break;
+        }
+}
+#endif
+
 /* Please update qemu-doc.texi when adding or changing commands */
 static const mon_cmd_t mon_cmds[] = {
     { "help|?", "s?", help_cmd,
@@ -1801,6 +1823,9 @@  static const mon_cmd_t mon_cmds[] = {
                                "acl deny vnc.username bob\n"
                                "acl reset vnc.username\n" },
     { "cpu_set", "is", do_cpu_set_nr, "cpu [online|offline]", "change cpu state" },
+#if defined(TARGET_I386)
+    { "mce", "iillll", do_inject_mce, "cpu bank status mcgstatus addr misc", "inject a MCE on the given CPU"},
+#endif
     { NULL, NULL, },
 };
 
@@ -2546,6 +2571,15 @@  static void monitor_handle_command(Monit
                       void *arg3, void *arg4, void *arg5);
     void (*handler_7)(Monitor *mon, void *arg0, void *arg1, void *arg2,
                       void *arg3, void *arg4, void *arg5, void *arg6);
+    void (*handler_8)(Monitor *mon, void *arg0, void *arg1, void *arg2,
+                      void *arg3, void *arg4, void *arg5, void *arg6,
+                      void *arg7);
+    void (*handler_9)(Monitor *mon, void *arg0, void *arg1, void *arg2,
+                      void *arg3, void *arg4, void *arg5, void *arg6,
+                      void *arg7, void *arg8);
+    void (*handler_10)(Monitor *mon, void *arg0, void *arg1, void *arg2,
+                       void *arg3, void *arg4, void *arg5, void *arg6,
+                       void *arg7, void *arg8, void *arg9);
 
 #ifdef DEBUG
     monitor_printf(mon, "command='%s'\n", cmdline);
@@ -2843,6 +2877,21 @@  static void monitor_handle_command(Monit
         handler_7(mon, args[0], args[1], args[2], args[3], args[4], args[5],
                   args[6]);
         break;
+    case 8:
+        handler_8 = cmd->handler;
+        handler_8(mon, args[0], args[1], args[2], args[3], args[4], args[5],
+                  args[6], args[7]);
+        break;
+    case 9:
+        handler_9 = cmd->handler;
+        handler_9(mon, args[0], args[1], args[2], args[3], args[4], args[5],
+                  args[6], args[7], args[8]);
+        break;
+    case 10:
+        handler_10 = cmd->handler;
+        handler_10(mon, args[0], args[1], args[2], args[3], args[4], args[5],
+                   args[6], args[7], args[8], args[9]);
+        break;
     default:
         monitor_printf(mon, "unsupported number of arguments: %d\n", nb_args);
         goto fail;
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -765,6 +765,7 @@  extern int use_icount;
 #define CPU_INTERRUPT_DEBUG  0x80 /* Debug event occured.  */
 #define CPU_INTERRUPT_VIRQ   0x100 /* virtual interrupt pending.  */
 #define CPU_INTERRUPT_NMI    0x200 /* NMI pending. */
+#define CPU_INTERRUPT_MCE    0x400 /* (x86 only) MCE pending. */
 
 void cpu_interrupt(CPUState *s, int mask);
 void cpu_reset_interrupt(CPUState *env, int mask);
@@ -1067,4 +1068,7 @@  extern int64_t kqemu_ret_excp_count;
 extern int64_t kqemu_ret_intr_count;
 #endif
 
+void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+                        uint64_t mcg_status, uint64_t addr, uint64_t misc);
+
 #endif /* CPU_ALL_H */
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -400,6 +400,10 @@  int cpu_exec(CPUState *env1)
                             env->hflags2 |= HF2_NMI_MASK;
                             do_interrupt(EXCP02_NMI, 0, 0, 0, 1);
                             next_tb = 0;
+			} else if (interrupt_request & CPU_INTERRUPT_MCE) {
+                            env->interrupt_request &= ~CPU_INTERRUPT_MCE;
+                            do_interrupt(EXCP12_MCHK, 0, 0, 0, 0);
+                            next_tb = 0;
                         } else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
                                    (((env->hflags2 & HF2_VINTR_MASK) && 
                                      (env->hflags2 & HF2_HIF_MASK)) ||
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -160,6 +160,19 @@  void cpu_save(QEMUFile *f, void *opaque)
     qemu_put_sbe32s(f, &pending_irq);
     qemu_put_be32s(f, &env->mp_state);
     qemu_put_be64s(f, &env->tsc);
+
+    /* MCE */
+    qemu_put_be64s(f, &env->mcg_cap);
+    if (env->mcg_cap) {
+        qemu_put_be64s(f, &env->mcg_status);
+        qemu_put_be64s(f, &env->mcg_ctl);
+        for (i = 0; i < (env->mcg_cap & 0xff); i++) {
+            qemu_put_be64s(f, &env->mce_banks[4*i]);
+            qemu_put_be64s(f, &env->mce_banks[4*i + 1]);
+            qemu_put_be64s(f, &env->mce_banks[4*i + 2]);
+            qemu_put_be64s(f, &env->mce_banks[4*i + 3]);
+        }
+    }
 }
 
 #ifdef USE_X86LDOUBLE
@@ -377,5 +390,20 @@  int cpu_load(QEMUFile *f, void *opaque, 
             kvm_load_mpstate(env);
         }
     }
+
+    if (version_id >= 10) {
+        qemu_get_be64s(f, &env->mcg_cap);
+        if (env->mcg_cap) {
+            qemu_get_be64s(f, &env->mcg_status);
+            qemu_get_be64s(f, &env->mcg_ctl);
+            for (i = 0; i < (env->mcg_cap & 0xff); i++) {
+                qemu_get_be64s(f, &env->mce_banks[4*i]);
+                qemu_get_be64s(f, &env->mce_banks[4*i + 1]);
+                qemu_get_be64s(f, &env->mce_banks[4*i + 2]);
+                qemu_get_be64s(f, &env->mce_banks[4*i + 3]);
+            }
+        }
+    }
+
     return 0;
 }