diff mbox

[-v3] KVM: MCE: Add MCE support to KVM

Message ID 1242031695.8213.17.camel@yhuang-dev.sh.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Huang Ying May 11, 2009, 8:48 a.m. UTC
The related MSRs are emulated. MCE capability is exported via
extension KVM_CAP_MCE and ioctl KVM_X86_GET_MCE_CAP_SUPPORTED.  A new
vcpu ioctl command KVM_X86_SETUP_MCE is used to setup MCE emulation
such as the mcg_cap. MCE is injected via vcpu ioctl command
KVM_X86_SET_MCE. Extended machine-check state (MCG_EXT_P) and CMCI are
not implemented.


ChangeLog:

v3:

- Fix KVM_CAP_MCE declaration

v2:

- Add MCE capability exportation support.
- Allocate MCE banks registers simulation backing memory during VCPU
  initialization.


Signed-off-by: Huang Ying <ying.huang@intel.com>

---
 arch/x86/include/asm/kvm.h      |    1 
 arch/x86/include/asm/kvm_host.h |    5 
 arch/x86/kvm/x86.c              |  220 +++++++++++++++++++++++++++++++++++-----
 include/linux/kvm.h             |   21 +++
 4 files changed, 223 insertions(+), 24 deletions(-)

Comments

Jaswinder Singh Rajput May 11, 2009, 9:05 a.m. UTC | #1
On Mon, 2009-05-11 at 16:48 +0800, Huang Ying wrote:
> +int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> +{
> +	u64 data;
> +
> +	switch (msr) {
> +	case 0xc0010010: /* SYSCFG */
> +	case 0xc0010015: /* HWCR */

You can replace this with :

+	case MSR_K8_SYSCFG:
+	case MSR_K8_HWCR:

--
JSR


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Huang Ying May 11, 2009, 9:08 a.m. UTC | #2
On Mon, 2009-05-11 at 17:05 +0800, Jaswinder Singh Rajput wrote:
> On Mon, 2009-05-11 at 16:48 +0800, Huang Ying wrote:
> > +int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> > +{
> > +	u64 data;
> > +
> > +	switch (msr) {
> > +	case 0xc0010010: /* SYSCFG */
> > +	case 0xc0010015: /* HWCR */
> 
> You can replace this with :
> 
> +	case MSR_K8_SYSCFG:
> +	case MSR_K8_HWCR:
> 

I do not change this actually. So I think it should be changed in
another patch.

Best Regards,
Huang Ying
Avi Kivity May 17, 2009, 6:56 p.m. UTC | #3
Huang Ying wrote:
> The related MSRs are emulated. MCE capability is exported via
> extension KVM_CAP_MCE and ioctl KVM_X86_GET_MCE_CAP_SUPPORTED.  A new
> vcpu ioctl command KVM_X86_SETUP_MCE is used to setup MCE emulation
> such as the mcg_cap. MCE is injected via vcpu ioctl command
> KVM_X86_SET_MCE. Extended machine-check state (MCG_EXT_P) and CMCI are
> not implemented.
>
>   

Applied, thanks.
Jan Kiszka May 25, 2009, 5:40 p.m. UTC | #4
Avi Kivity wrote:
> Huang Ying wrote:
>> The related MSRs are emulated. MCE capability is exported via
>> extension KVM_CAP_MCE and ioctl KVM_X86_GET_MCE_CAP_SUPPORTED.  A new
>> vcpu ioctl command KVM_X86_SETUP_MCE is used to setup MCE emulation
>> such as the mcg_cap. MCE is injected via vcpu ioctl command
>> KVM_X86_SET_MCE. Extended machine-check state (MCG_EXT_P) and CMCI are
>> not implemented.
>>
>>   
> 
> Applied, thanks.
> 

This patch breaks kvm build for 32-bit hosts.

Is the KVM MCE interface completely or only partially limited to x86-64,
ie. can I completely #ifdef it away on 32-bit hosts (including
KVM_CAP_MCE) or is this more complicated?

Jan
Avi Kivity May 25, 2009, 6:09 p.m. UTC | #5
Jan Kiszka wrote:
> Avi Kivity wrote:
>   
>> Huang Ying wrote:
>>     
>>> The related MSRs are emulated. MCE capability is exported via
>>> extension KVM_CAP_MCE and ioctl KVM_X86_GET_MCE_CAP_SUPPORTED.  A new
>>> vcpu ioctl command KVM_X86_SETUP_MCE is used to setup MCE emulation
>>> such as the mcg_cap. MCE is injected via vcpu ioctl command
>>> KVM_X86_SET_MCE. Extended machine-check state (MCG_EXT_P) and CMCI are
>>> not implemented.
>>>
>>>   
>>>       
>> Applied, thanks.
>>
>>     
>
> This patch breaks kvm build for 32-bit hosts.
>   

Strange, I build-test on i386.  What's your failure?

> Is the KVM MCE interface completely or only partially limited to x86-64,
> ie. can I completely #ifdef it away on 32-bit hosts (including
> KVM_CAP_MCE) or is this more complicated?
>   

I don't see any reason to limit it to x86_64?
Jan Kiszka May 25, 2009, 6:15 p.m. UTC | #6
Avi Kivity wrote:
> Jan Kiszka wrote:
>> Avi Kivity wrote:
>>  
>>> Huang Ying wrote:
>>>    
>>>> The related MSRs are emulated. MCE capability is exported via
>>>> extension KVM_CAP_MCE and ioctl KVM_X86_GET_MCE_CAP_SUPPORTED.  A new
>>>> vcpu ioctl command KVM_X86_SETUP_MCE is used to setup MCE emulation
>>>> such as the mcg_cap. MCE is injected via vcpu ioctl command
>>>> KVM_X86_SET_MCE. Extended machine-check state (MCG_EXT_P) and CMCI are
>>>> not implemented.
>>>>
>>>>         
>>> Applied, thanks.
>>>
>>>     
>>
>> This patch breaks kvm build for 32-bit hosts.
>>   
> 
> Strange, I build-test on i386.  What's your failure?

This is against some 2.6.28 kernel:

  CC [M]  /data/kvm-kmod/x86/x86.o
/data/kvm-kmod/x86/x86.c: In function ‘set_msr_mce’:
/data/kvm-kmod/x86/x86.c:794: error: ‘MCG_CTL_P’ undeclared (first use in this function)
/data/kvm-kmod/x86/x86.c:794: error: (Each undeclared identifier is reported only once
/data/kvm-kmod/x86/x86.c:794: error: for each function it appears in.)
/data/kvm-kmod/x86/x86.c: In function ‘get_msr_mce’:
/data/kvm-kmod/x86/x86.c:950: error: ‘MCG_CTL_P’ undeclared (first use in this function)
/data/kvm-kmod/x86/x86.c: In function ‘kvm_arch_dev_ioctl’:
/data/kvm-kmod/x86/x86.c:1216: error: ‘MCG_CTL_P’ undeclared (first use in this function)
/data/kvm-kmod/x86/x86.c: In function ‘kvm_vcpu_ioctl_x86_setup_mce’:
/data/kvm-kmod/x86/x86.c:1592: error: ‘MCG_CTL_P’ undeclared (first use in this function)
/data/kvm-kmod/x86/x86.c: In function ‘kvm_vcpu_ioctl_x86_set_mce’:
/data/kvm-kmod/x86/x86.c:1613: error: ‘MCI_STATUS_VAL’ undeclared (first use in this function)
/data/kvm-kmod/x86/x86.c:1619: error: ‘MCI_STATUS_UC’ undeclared (first use in this function)
/data/kvm-kmod/x86/x86.c:1619: error: ‘MCG_CTL_P’ undeclared (first use in this function)
/data/kvm-kmod/x86/x86.c:1630: error: ‘MCG_STATUS_MCIP’ undeclared (first use in this function)
/data/kvm-kmod/x86/x86.c:1639: error: ‘MCI_STATUS_OVER’ undeclared (first use in this function)

> 
>> Is the KVM MCE interface completely or only partially limited to x86-64,
>> ie. can I completely #ifdef it away on 32-bit hosts (including
>> KVM_CAP_MCE) or is this more complicated?
>>   
> 
> I don't see any reason to limit it to x86_64?
> 

Well, if I look at the definition of MCI_STATUS_VAL as (1UL<<63),
something tells me: "Hey, only use me on 64-bit hosts!" But I have no
clue about details of this stuff, and from a second glance at it is
seems to include at least some parts that are valid on 32-bit as well.

Jan
Jan Kiszka May 25, 2009, 6:20 p.m. UTC | #7
Jan Kiszka wrote:
> Avi Kivity wrote:
>> Jan Kiszka wrote:
>>> Avi Kivity wrote:
>>>  
>>>> Huang Ying wrote:
>>>>    
>>>>> The related MSRs are emulated. MCE capability is exported via
>>>>> extension KVM_CAP_MCE and ioctl KVM_X86_GET_MCE_CAP_SUPPORTED.  A new
>>>>> vcpu ioctl command KVM_X86_SETUP_MCE is used to setup MCE emulation
>>>>> such as the mcg_cap. MCE is injected via vcpu ioctl command
>>>>> KVM_X86_SET_MCE. Extended machine-check state (MCG_EXT_P) and CMCI are
>>>>> not implemented.
>>>>>
>>>>>         
>>>> Applied, thanks.
>>>>
>>>>     
>>> This patch breaks kvm build for 32-bit hosts.
>>>   
>> Strange, I build-test on i386.  What's your failure?
> 
> This is against some 2.6.28 kernel:
> 
>   CC [M]  /data/kvm-kmod/x86/x86.o
> /data/kvm-kmod/x86/x86.c: In function ‘set_msr_mce’:
> /data/kvm-kmod/x86/x86.c:794: error: ‘MCG_CTL_P’ undeclared (first use in this function)
> /data/kvm-kmod/x86/x86.c:794: error: (Each undeclared identifier is reported only once
> /data/kvm-kmod/x86/x86.c:794: error: for each function it appears in.)
> /data/kvm-kmod/x86/x86.c: In function ‘get_msr_mce’:
> /data/kvm-kmod/x86/x86.c:950: error: ‘MCG_CTL_P’ undeclared (first use in this function)
> /data/kvm-kmod/x86/x86.c: In function ‘kvm_arch_dev_ioctl’:
> /data/kvm-kmod/x86/x86.c:1216: error: ‘MCG_CTL_P’ undeclared (first use in this function)
> /data/kvm-kmod/x86/x86.c: In function ‘kvm_vcpu_ioctl_x86_setup_mce’:
> /data/kvm-kmod/x86/x86.c:1592: error: ‘MCG_CTL_P’ undeclared (first use in this function)
> /data/kvm-kmod/x86/x86.c: In function ‘kvm_vcpu_ioctl_x86_set_mce’:
> /data/kvm-kmod/x86/x86.c:1613: error: ‘MCI_STATUS_VAL’ undeclared (first use in this function)
> /data/kvm-kmod/x86/x86.c:1619: error: ‘MCI_STATUS_UC’ undeclared (first use in this function)
> /data/kvm-kmod/x86/x86.c:1619: error: ‘MCG_CTL_P’ undeclared (first use in this function)
> /data/kvm-kmod/x86/x86.c:1630: error: ‘MCG_STATUS_MCIP’ undeclared (first use in this function)
> /data/kvm-kmod/x86/x86.c:1639: error: ‘MCI_STATUS_OVER’ undeclared (first use in this function)
> 
>>> Is the KVM MCE interface completely or only partially limited to x86-64,
>>> ie. can I completely #ifdef it away on 32-bit hosts (including
>>> KVM_CAP_MCE) or is this more complicated?
>>>   
>> I don't see any reason to limit it to x86_64?
>>
> 
> Well, if I look at the definition of MCI_STATUS_VAL as (1UL<<63),
> something tells me: "Hey, only use me on 64-bit hosts!" But I have no
> clue about details of this stuff, and from a second glance at it is
> seems to include at least some parts that are valid on 32-bit as well.

Ah, interesting: /someone/ removed the #ifdef __x86_64__ from
arch/x86/include/asm/mce.h, but that's not mainline yet...

Jan
Avi Kivity May 26, 2009, 8:11 a.m. UTC | #8
Jan Kiszka wrote:
>> Well, if I look at the definition of MCI_STATUS_VAL as (1UL<<63),
>> something tells me: "Hey, only use me on 64-bit hosts!" But I have no
>> clue about details of this stuff, and from a second glance at it is
>> seems to include at least some parts that are valid on 32-bit as well.
>>     
>
> Ah, interesting: /someone/ removed the #ifdef __x86_64__ from
> arch/x86/include/asm/mce.h, but that's not mainline yet...
>   

Well that someone missed the MCI_STATUS_VAL bit.  Do you see anything 
else fishy in there?
diff mbox

Patch

--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@ 
 #include <asm/msr.h>
 #include <asm/desc.h>
 #include <asm/mtrr.h>
+#include <asm/mce.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS						\
@@ -55,6 +56,10 @@ 
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
+
+#define KVM_MAX_MCE_BANKS 32
+#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
+
 /* EFER defaults:
  * - enable syscall per default because its emulated by KVM
  * - enable LME and LMA per default on 64 bit KVM
@@ -737,23 +742,43 @@  static int set_msr_mtrr(struct kvm_vcpu 
 	return 0;
 }
 
-int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
+	u64 mcg_cap = vcpu->arch.mcg_cap;
+	unsigned bank_num = mcg_cap & 0xff;
+
 	switch (msr) {
-	case MSR_EFER:
-		set_efer(vcpu, data);
-		break;
-	case MSR_IA32_MC0_STATUS:
-		pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
-		       __func__, data);
-		break;
 	case MSR_IA32_MCG_STATUS:
-		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
-			__func__, data);
+		vcpu->arch.mcg_status = data;
 		break;
 	case MSR_IA32_MCG_CTL:
-		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
-			__func__, data);
+		if (!(mcg_cap & MCG_CTL_P))
+			return 1;
+		if (data != 0 && data != ~(u64)0)
+			return -1;
+		vcpu->arch.mcg_ctl = data;
+		break;
+	default:
+		if (msr >= MSR_IA32_MC0_CTL &&
+		    msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+			u32 offset = msr - MSR_IA32_MC0_CTL;
+			/* only 0 or all 1s can be written to IA32_MCi_CTL */
+			if ((offset & 0x3) == 0 &&
+			    data != 0 && data != ~(u64)0)
+				return -1;
+			vcpu->arch.mce_banks[offset] = data;
+			break;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	switch (msr) {
+	case MSR_EFER:
+		set_efer(vcpu, data);
 		break;
 	case MSR_IA32_DEBUGCTLMSR:
 		if (!data) {
@@ -809,6 +834,10 @@  int kvm_set_msr_common(struct kvm_vcpu *
 		kvm_request_guest_time_update(vcpu);
 		break;
 	}
+	case MSR_IA32_MCG_CTL:
+	case MSR_IA32_MCG_STATUS:
+	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+		return set_msr_mce(vcpu, msr, data);
 	default:
 		pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
 		return 1;
@@ -864,26 +893,49 @@  static int get_msr_mtrr(struct kvm_vcpu 
 	return 0;
 }
 
-int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
 	u64 data;
+	u64 mcg_cap = vcpu->arch.mcg_cap;
+	unsigned bank_num = mcg_cap & 0xff;
 
 	switch (msr) {
-	case 0xc0010010: /* SYSCFG */
-	case 0xc0010015: /* HWCR */
-	case MSR_IA32_PLATFORM_ID:
 	case MSR_IA32_P5_MC_ADDR:
 	case MSR_IA32_P5_MC_TYPE:
-	case MSR_IA32_MC0_CTL:
-	case MSR_IA32_MCG_STATUS:
+		data = 0;
+		break;
 	case MSR_IA32_MCG_CAP:
+		data = vcpu->arch.mcg_cap;
+		break;
 	case MSR_IA32_MCG_CTL:
-	case MSR_IA32_MC0_MISC:
-	case MSR_IA32_MC0_MISC+4:
-	case MSR_IA32_MC0_MISC+8:
-	case MSR_IA32_MC0_MISC+12:
-	case MSR_IA32_MC0_MISC+16:
-	case MSR_IA32_MC0_MISC+20:
+		if (!(mcg_cap & MCG_CTL_P))
+			return 1;
+		data = vcpu->arch.mcg_ctl;
+		break;
+	case MSR_IA32_MCG_STATUS:
+		data = vcpu->arch.mcg_status;
+		break;
+	default:
+		if (msr >= MSR_IA32_MC0_CTL &&
+		    msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+			u32 offset = msr - MSR_IA32_MC0_CTL;
+			data = vcpu->arch.mce_banks[offset];
+			break;
+		}
+		return 1;
+	}
+	*pdata = data;
+	return 0;
+}
+
+int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data;
+
+	switch (msr) {
+	case 0xc0010010: /* SYSCFG */
+	case 0xc0010015: /* HWCR */
+	case MSR_IA32_PLATFORM_ID:
 	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_EBL_CR_POWERON:
 	case MSR_IA32_DEBUGCTLMSR:
@@ -925,6 +977,13 @@  int kvm_get_msr_common(struct kvm_vcpu *
 	case MSR_KVM_SYSTEM_TIME:
 		data = vcpu->arch.time;
 		break;
+	case MSR_IA32_P5_MC_ADDR:
+	case MSR_IA32_P5_MC_TYPE:
+	case MSR_IA32_MCG_CAP:
+	case MSR_IA32_MCG_CTL:
+	case MSR_IA32_MCG_STATUS:
+	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+		return get_msr_mce(vcpu, msr, pdata);
 	default:
 		pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -1046,6 +1105,9 @@  int kvm_dev_ioctl_check_extension(long e
 	case KVM_CAP_IOMMU:
 		r = iommu_found();
 		break;
+	case KVM_CAP_MCE:
+		r = KVM_MAX_MCE_BANKS;
+		break;
 	default:
 		r = 0;
 		break;
@@ -1106,6 +1168,16 @@  long kvm_arch_dev_ioctl(struct file *fil
 		r = 0;
 		break;
 	}
+	case KVM_X86_GET_MCE_CAP_SUPPORTED: {
+		u64 mce_cap;
+
+		mce_cap = KVM_MCE_CAP_SUPPORTED;
+		r = -EFAULT;
+		if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
+			goto out;
+		r = 0;
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
@@ -1463,6 +1535,80 @@  static int vcpu_ioctl_tpr_access_reporti
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
+					u64 mcg_cap)
+{
+	int r;
+	unsigned bank_num = mcg_cap & 0xff, bank;
+
+	r = -EINVAL;
+	if (!bank_num)
+		goto out;
+	if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
+		goto out;
+	r = 0;
+	vcpu->arch.mcg_cap = mcg_cap;
+	/* Init IA32_MCG_CTL to all 1s */
+	if (mcg_cap & MCG_CTL_P)
+		vcpu->arch.mcg_ctl = ~(u64)0;
+	/* Init IA32_MCi_CTL to all 1s */
+	for (bank = 0; bank < bank_num; bank++)
+		vcpu->arch.mce_banks[bank*4] = ~(u64)0;
+out:
+	return r;
+}
+
+static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
+				      struct kvm_x86_mce *mce)
+{
+	u64 mcg_cap = vcpu->arch.mcg_cap;
+	unsigned bank_num = mcg_cap & 0xff;
+	u64 *banks = vcpu->arch.mce_banks;
+
+	if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
+		return -EINVAL;
+	/*
+	 * if IA32_MCG_CTL is not all 1s, the uncorrected error
+	 * reporting is disabled
+	 */
+	if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
+	    vcpu->arch.mcg_ctl != ~(u64)0)
+		return 0;
+	banks += 4 * mce->bank;
+	/*
+	 * if IA32_MCi_CTL is not all 1s, the uncorrected error
+	 * reporting is disabled for the bank
+	 */
+	if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
+		return 0;
+	if (mce->status & MCI_STATUS_UC) {
+		if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
+		    !(vcpu->arch.cr4 & X86_CR4_MCE)) {
+			printk(KERN_DEBUG "kvm: set_mce: "
+			       "injects mce exception while "
+			       "previous one is in progress!\n");
+			set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+			return 0;
+		}
+		if (banks[1] & MCI_STATUS_VAL)
+			mce->status |= MCI_STATUS_OVER;
+		banks[2] = mce->addr;
+		banks[3] = mce->misc;
+		vcpu->arch.mcg_status = mce->mcg_status;
+		banks[1] = mce->status;
+		kvm_queue_exception(vcpu, MC_VECTOR);
+	} else if (!(banks[1] & MCI_STATUS_VAL)
+		   || !(banks[1] & MCI_STATUS_UC)) {
+		if (banks[1] & MCI_STATUS_VAL)
+			mce->status |= MCI_STATUS_OVER;
+		banks[2] = mce->addr;
+		banks[3] = mce->misc;
+		banks[1] = mce->status;
+	} else
+		banks[1] |= MCI_STATUS_OVER;
+	return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -1596,6 +1742,24 @@  long kvm_arch_vcpu_ioctl(struct file *fi
 		kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
 		break;
 	}
+	case KVM_X86_SETUP_MCE: {
+		u64 mcg_cap;
+
+		r = -EFAULT;
+		if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
+			goto out;
+		r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
+		break;
+	}
+	case KVM_X86_SET_MCE: {
+		struct kvm_x86_mce mce;
+
+		r = -EFAULT;
+		if (copy_from_user(&mce, argp, sizeof mce))
+			goto out;
+		r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
@@ -4411,6 +4575,14 @@  int kvm_arch_vcpu_init(struct kvm_vcpu *
 			goto fail_mmu_destroy;
 	}
 
+	vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
+				       GFP_KERNEL);
+	if (!vcpu->arch.mce_banks) {
+		r = -ENOMEM;
+		goto fail_mmu_destroy;
+	}
+	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
+
 	return 0;
 
 fail_mmu_destroy:
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -371,6 +371,11 @@  struct kvm_vcpu_arch {
 	unsigned long dr6;
 	unsigned long dr7;
 	unsigned long eff_db[KVM_NR_DB_REGS];
+
+	u64 mcg_cap;
+	u64 mcg_status;
+	u64 mcg_ctl;
+	u64 *mce_banks;
 };
 
 struct kvm_mem_alias {
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -415,6 +415,9 @@  struct kvm_trace_rec {
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#ifdef __KVM_HAVE_MCE
+#define KVM_CAP_MCE 31
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -454,6 +457,20 @@  struct kvm_irq_routing {
 
 #endif
 
+#ifdef KVM_CAP_MCE
+/* x86 MCE */
+struct kvm_x86_mce {
+	__u64 status;
+	__u64 addr;
+	__u64 misc;
+	__u64 mcg_status;
+	__u8 bank;
+	__u8 pad1;
+	__u16 pad2;
+	__u32 pad3;
+};
+#endif
+
 /*
  * ioctls for VM fds
  */
@@ -541,6 +558,10 @@  struct kvm_irq_routing {
 #define KVM_NMI                   _IO(KVMIO,  0x9a)
 /* Available with KVM_CAP_SET_GUEST_DEBUG */
 #define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
+/* MCE for x86 */
+#define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
+#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
+#define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
 
 /*
  * Deprecated interfaces
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -17,6 +17,7 @@ 
 #define __KVM_HAVE_USER_NMI
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_MSIX
+#define __KVM_HAVE_MCE
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256