KVM: VMX: enable LBR virtualization
diff mbox

Message ID 1444471906-8496-1-git-send-email-jianjay.zhou@huawei.com
State New
Headers show

Commit Message

Jay Zhou Oct. 10, 2015, 10:11 a.m. UTC
Using vmx msr store/load mechanism and msr intercept bitmap
to implement LBR virtualization.

Signed-off-by: Jian Zhou  <jianjay.zhou@huawei.com>
Signed-off-by: Stephen He <herongguang.he@huawei.com>

--
1.7.12.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jay Zhou Oct. 12, 2015, 12:10 p.m. UTC | #1
ping...

> Using vmx msr store/load mechanism and msr intercept bitmap
> to implement LBR virtualization.
> 
> Signed-off-by: Jian Zhou  <jianjay.zhou@huawei.com>
> Signed-off-by: Stephen He <herongguang.he@huawei.com>
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 2beee03..244f68c 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -887,6 +887,12 @@ struct kvm_x86_ops {
>   					   gfn_t offset, unsigned long mask);
>   	/* pmu operations of sub-arch */
>   	const struct kvm_pmu_ops *pmu_ops;
> +
> +	void (*vmcs_write64)(unsigned long field, u64 value);
> +	u64 (*vmcs_read64)(unsigned long field);
> +
> +	int (*add_atomic_switch_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 guest_val, u64 host_val);
> +	void (*disable_intercept_guest_msr)(struct kvm_vcpu *vcpu, u32 msr);
>   };
> 
>   struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 06ef490..2305308 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -159,7 +159,7 @@ module_param(ple_window_max, int, S_IRUGO);
> 
>   extern const ulong vmx_return;
> 
> -#define NR_AUTOLOAD_MSRS 8
> +#define NR_AUTOLOAD_MSRS 256
>   #define VMCS02_POOL_SIZE 1
> 
>   struct vmcs {
> @@ -1630,6 +1630,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
>   	--m->nr;
>   	m->guest[i] = m->guest[m->nr];
>   	m->host[i] = m->host[m->nr];
> +	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr);
>   	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
>   	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
>   }
> @@ -1645,7 +1646,7 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
>   	vm_exit_controls_setbit(vmx, exit);
>   }
> 
> -static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> +static int add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>   				  u64 guest_val, u64 host_val)
>   {
>   	unsigned i;
> @@ -1660,7 +1661,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>   					GUEST_IA32_EFER,
>   					HOST_IA32_EFER,
>   					guest_val, host_val);
> -			return;
> +			return 0;
>   		}
>   		break;
>   	case MSR_CORE_PERF_GLOBAL_CTRL:
> @@ -1671,7 +1672,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>   					GUEST_IA32_PERF_GLOBAL_CTRL,
>   					HOST_IA32_PERF_GLOBAL_CTRL,
>   					guest_val, host_val);
> -			return;
> +			return 0;
>   		}
>   		break;
>   	}
> @@ -1683,9 +1684,10 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>   	if (i == NR_AUTOLOAD_MSRS) {
>   		printk_once(KERN_WARNING "Not enough msr switch entries. "
>   				"Can't add msr %x\n", msr);
> -		return;
> +		return -ENOSPC;
>   	} else if (i == m->nr) {
>   		++m->nr;
> +		vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr);
>   		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
>   		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
>   	}
> @@ -1694,6 +1696,15 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>   	m->guest[i].value = guest_val;
>   	m->host[i].index = msr;
>   	m->host[i].value = host_val;
> +
> +	return 0;
> +}
> +
> +static int vmx_add_atomic_switch_msr(struct kvm_vcpu *vcpu, u32 msr, u64 guest_val, u64 host_val)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +	return add_atomic_switch_msr(vmx, msr, guest_val, host_val);
>   }
> 
>   static void reload_tss(void)
> @@ -4332,6 +4343,20 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
>   			msr, MSR_TYPE_W);
>   }
> 
> +static void vmx_disable_intercept_guest_msr(struct kvm_vcpu *vcpu, u32 msr)
> +{
> +	if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
> +		vmx_disable_intercept_msr_read_x2apic(msr);
> +		vmx_disable_intercept_msr_write_x2apic(msr);
> +	}
> +	else {
> +		if (is_long_mode(vcpu))
> +			vmx_disable_intercept_for_msr(msr, true);
> +		else
> +			vmx_disable_intercept_for_msr(msr, false);
> +	}
> +}
> +
>   static int vmx_vm_has_apicv(struct kvm *kvm)
>   {
>   	return enable_apicv && irqchip_in_kernel(kvm);
> @@ -4654,6 +4679,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>   #endif
> 
>   	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
> +	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autoload.guest));
>   	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
>   	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
>   	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
> @@ -10409,6 +10435,12 @@ static struct kvm_x86_ops vmx_x86_ops = {
>   	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
> 
>   	.pmu_ops = &intel_pmu_ops,
> +
> +	.vmcs_write64 = vmcs_write64,
> +	.vmcs_read64 = vmcs_read64,
> +
> +	.add_atomic_switch_msr = vmx_add_atomic_switch_msr,
> +	.disable_intercept_guest_msr = vmx_disable_intercept_guest_msr,
>   };
> 
>   static int __init vmx_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 92511d4..f1fcd7c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -176,6 +176,113 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
> 
>   u64 __read_mostly host_xcr0;
> 
> +/* Netburst (P4) last-branch recording */
> +#define MSR_P4_LER_FROM_LIP 		0x000001d7
> +#define MSR_P4_LER_TO_LIP 		0x000001d8
> +#define MSR_P4_LASTBRANCH_TOS		0x000001da
> +#define MSR_P4_LASTBRANCH_0		0x000001db
> +#define NUM_MSR_P4_LASTBRANCH		4
> +#define MSR_P4_LASTBRANCH_0_FROM_LIP	0x00000680
> +#define MSR_P4_LASTBRANCH_0_TO_LIP	0x000006c0
> +#define NUM_MSR_P4_LASTBRANCH_FROM_TO	16
> +
> +/* Pentium M (and Core) last-branch recording */
> +#define MSR_PM_LASTBRANCH_TOS		0x000001c9
> +#define MSR_PM_LASTBRANCH_0		0x00000040
> +#define NUM_MSR_PM_LASTBRANCH		8
> +
> +/* Core 2 and Atom last-branch recording */
> +#define MSR_C2_LASTBRANCH_TOS		0x000001c9
> +#define MSR_C2_LASTBRANCH_0_FROM_IP	0x00000040
> +#define MSR_C2_LASTBRANCH_0_TO_IP	0x00000060
> +#define NUM_MSR_C2_LASTBRANCH_FROM_TO	4
> +#define NUM_MSR_ATOM_LASTBRANCH_FROM_TO	8
> +
> +struct lbr_info {
> +	u32 base, count;
> +} p4_lbr[] = {
> +	{ MSR_LBR_SELECT,               1 },
> +	{ MSR_P4_LER_FROM_LIP,          1 },
> +	{ MSR_P4_LER_TO_LIP,            1 },
> +	{ MSR_P4_LASTBRANCH_TOS,        1 },
> +	{ MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
> +	{ MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
> +	{ 0, 0 }
> +}, c2_lbr[] = {
> +	{ MSR_LBR_SELECT,               1 },
> +	{ MSR_IA32_LASTINTFROMIP,       1 },
> +	{ MSR_IA32_LASTINTTOIP,         1 },
> +	{ MSR_C2_LASTBRANCH_TOS,        1 },
> +	{ MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_C2_LASTBRANCH_FROM_TO },
> +	{ MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_C2_LASTBRANCH_FROM_TO },
> +	{ 0, 0 }
> +}, nh_lbr[] = {
> +	{ MSR_LBR_SELECT,               1 },
> +	{ MSR_IA32_LASTINTFROMIP,       1 },
> +	{ MSR_IA32_LASTINTTOIP,         1 },
> +	{ MSR_C2_LASTBRANCH_TOS,        1 },
> +	{ MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
> +	{ MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
> +	{ 0, 0 }
> +}, at_lbr[] = {
> +	{ MSR_LBR_SELECT,               1 },
> +	{ MSR_IA32_LASTINTFROMIP,       1 },
> +	{ MSR_IA32_LASTINTTOIP,         1 },
> +	{ MSR_C2_LASTBRANCH_TOS,        1 },
> +	{ MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
> +	{ MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
> +	{ 0, 0 }
> +};
> +
> +static const struct lbr_info *last_branch_msr_get(void)
> +{
> +	switch ( boot_cpu_data.x86 )
> +	{
> +		case 6:
> +			switch ( boot_cpu_data.x86_model )
> +			{
> +				/* Core2 Duo */
> +				case 15:
> +				/* Enhanced Core */
> +				case 23:
> +					return c2_lbr;
> +					break;
> +				/* Nehalem */
> +				case 26: case 30: case 31: case 46:
> +				/* Westmere */
> +				case 37: case 44: case 47:
> +				/* Sandy Bridge */
> +				case 42: case 45:
> +				/* Ivy Bridge */
> +				case 58: case 62:
> +				/* Haswell */
> +				case 60: case 63: case 69: case 70:
> +				/* future */
> +				case 61: case 78:
> +					return nh_lbr;
> +					break;
> +				/* Atom */
> +				case 28: case 38: case 39: case 53: case 54:
> +				/* Silvermont */
> +				case 55: case 74: case 77: case 90: case 93:
> +					return at_lbr;
> +					break;
> +			}
> +			break;
> +		case 15:
> +			switch ( boot_cpu_data.x86_model )
> +			{
> +				/* Pentium4/Xeon with em64t */
> +				case 3: case 4: case 6:
> +					return p4_lbr;
> +					break;
> +			}
> +			break;
> +	}
> +
> +	return NULL;
> +}
> +
>   static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
> 
>   static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
> @@ -1917,6 +2024,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   	bool pr = false;
>   	u32 msr = msr_info->index;
>   	u64 data = msr_info->data;
> +	u64 supported = 0;
> +	static const struct lbr_info *lbr = NULL;
> +	int i = 0;
> +	int value = 0;
> 
>   	switch (msr) {
>   	case MSR_AMD64_NB_CFG:
> @@ -1948,16 +2059,34 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   		}
>   		break;
>   	case MSR_IA32_DEBUGCTLMSR:
> -		if (!data) {
> -			/* We support the non-activated case already */
> -			break;
> -		} else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
> -			/* Values other than LBR and BTF are vendor-specific,
> -			   thus reserved and should throw a #GP */
> +		supported = DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
> +
> +		if (data & ~supported) {
> +			/* Values other than LBR, BTF and FREEZE_LBRS_ON_PMI are not supported,
> +			 * thus reserved and should throw a #GP */
> +			vcpu_unimpl(vcpu, "unsupported MSR_IA32_DEBUGCTLMSR wrmsr: 0x%llx\n", data);
>   			return 1;
>   		}
> -		vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
> -			    __func__, data);
> +
> +		if (data & DEBUGCTLMSR_LBR) {
> +			lbr = last_branch_msr_get();
> +			if (lbr == NULL)
> +				break;
> +
> +			for (; (value == 0) && lbr->count; lbr++)
> +				for (i = 0; (value == 0) && (i < lbr->count); i++)
> +					if ((value = kvm_x86_ops->add_atomic_switch_msr(vcpu, lbr->base + i, 0, 0)) == 0)
> +						kvm_x86_ops->disable_intercept_guest_msr(vcpu, lbr->base + i);
> +		}
> +
> +		if (value == 0) {
> +			kvm_x86_ops->vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> +		}
> +		else {
> +			/* throw a #GP */
> +			return 1;
> +		}
> +
>   		break;
>   	case 0x200 ... 0x2ff:
>   		return kvm_mtrr_set_msr(vcpu, msr, data);
> @@ -2178,9 +2307,11 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>   int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   {
>   	switch (msr_info->index) {
> +	case MSR_IA32_DEBUGCTLMSR:
> +		msr_info->data = kvm_x86_ops->vmcs_read64(GUEST_IA32_DEBUGCTL);
> +		break;
>   	case MSR_IA32_PLATFORM_ID:
>   	case MSR_IA32_EBL_CR_POWERON:
> -	case MSR_IA32_DEBUGCTLMSR:
>   	case MSR_IA32_LASTBRANCHFROMIP:
>   	case MSR_IA32_LASTBRANCHTOIP:
>   	case MSR_IA32_LASTINTFROMIP:
> --
> 1.7.12.4
> 
> 
> 
> .
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Oct. 12, 2015, 12:44 p.m. UTC | #2
On 12/10/2015 14:10, Jian Zhou wrote:
> ping...

I think your expectations for review RTT are a bit too optimistic.
I have only worked 4 hours since you posted the patch...  But it was on
my list anyway, so let's do it.

First of all, you should move the implementation entirely into vmx.c,
because these new hooks are not acceptable:

>> +	void (*vmcs_write64)(unsigned long field, u64 value);
>> +	u64 (*vmcs_read64)(unsigned long field);
>> +
>> +	int (*add_atomic_switch_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 guest_val, u64 host_val);
>> +	void (*disable_intercept_guest_msr)(struct kvm_vcpu *vcpu, u32 msr);

x86.c must not have any knowledge of VMX internals such as the VMCS.
Also, AMD have their own implementation of LBR virtualization.

In addition, the MSR numbers may differ between the guest and the host,
because it is possible to emulate e.g. a Core CPU on a Core 2 CPU.  So I
recommend against using the atomic switch mechanism for the from/to MSRs.

Instead, if GUEST_DEBUGCTL.LBR = 1 you can read the from/to MSRs into an
array stored in struct kvm_arch_vcpu at vmexit time.  Reading the
from/to MSRs should cause a vmexit in the first implementation.  Any
optimization of this can be done separately.

As a benefit, this will force you to implement a mechanism for passing
the contents of the MSRs to userspace and read them back.  This is
necessary for debugging and for migration.  You will also have to
implement support for the feature in QEMU in order to support migration
of virtual machines that use LBRs.

>> +/* Core 2 and Atom last-branch recording */
>> +#define MSR_C2_LASTBRANCH_TOS		0x000001c9
>> +#define MSR_C2_LASTBRANCH_0_FROM_IP	0x00000040
>> +#define MSR_C2_LASTBRANCH_0_TO_IP	0x00000060
>> +#define NUM_MSR_C2_LASTBRANCH_FROM_TO	4
>> +#define NUM_MSR_ATOM_LASTBRANCH_FROM_TO	8
>> +
>> +struct lbr_info {
>> +	u32 base, count;
>> +} p4_lbr[] = {
>> +	{ MSR_LBR_SELECT,               1 },
>> +	{ MSR_P4_LER_FROM_LIP,          1 },
>> +	{ MSR_P4_LER_TO_LIP,            1 },
>> +	{ MSR_P4_LASTBRANCH_TOS,        1 },
>> +	{ MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
>> +	{ MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
>> +	{ 0, 0 }
>> +}, c2_lbr[] = {
>> +	{ MSR_LBR_SELECT,               1 },
>> +	{ MSR_IA32_LASTINTFROMIP,       1 },
>> +	{ MSR_IA32_LASTINTTOIP,         1 },
>> +	{ MSR_C2_LASTBRANCH_TOS,        1 },
>> +	{ MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_C2_LASTBRANCH_FROM_TO },
>> +	{ MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_C2_LASTBRANCH_FROM_TO },
>> +	{ 0, 0 }
>> +}, nh_lbr[] = {
>> +	{ MSR_LBR_SELECT,               1 },
>> +	{ MSR_IA32_LASTINTFROMIP,       1 },
>> +	{ MSR_IA32_LASTINTTOIP,         1 },
>> +	{ MSR_C2_LASTBRANCH_TOS,        1 },

Nehalem has 16 LBR records, not 4.  I suggest that you reorganize the
tables so that it is easy to match them against tables in the Intel SDM.

Note that you have to compute the number of records according to the
_guest_ CPUID, not the host CPUID.  For simplicity I suggest that you
only enable LBR virtualization if the host machine has 16 LBR entries,
and make it possible to disable it through a kvm_intel module parameter.

Thanks,

Paolo

>> +	{ MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
>> +	{ MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
>> +	{ 0, 0 }
>> +}, at_lbr[] = {
>> +	{ MSR_LBR_SELECT,               1 },
>> +	{ MSR_IA32_LASTINTFROMIP,       1 },
>> +	{ MSR_IA32_LASTINTTOIP,         1 },
>> +	{ MSR_C2_LASTBRANCH_TOS,        1 },
>> +	{ MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
>> +	{ MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
>> +	{ 0, 0 }
>> +};
>> +
>> +static const struct lbr_info *last_branch_msr_get(void)
>> +{
>> +	switch ( boot_cpu_data.x86 )
>> +	{
>> +		case 6:
>> +			switch ( boot_cpu_data.x86_model )
>> +			{
>> +				/* Core2 Duo */
>> +				case 15:
>> +				/* Enhanced Core */
>> +				case 23:
>> +					return c2_lbr;
>> +					break;
>> +				/* Nehalem */
>> +				case 26: case 30: case 31: case 46:
>> +				/* Westmere */
>> +				case 37: case 44: case 47:
>> +				/* Sandy Bridge */
>> +				case 42: case 45:
>> +				/* Ivy Bridge */
>> +				case 58: case 62:
>> +				/* Haswell */
>> +				case 60: case 63: case 69: case 70:
>> +				/* future */
>> +				case 61: case 78:
>> +					return nh_lbr;
>> +					break;
>> +				/* Atom */
>> +				case 28: case 38: case 39: case 53: case 54:
>> +				/* Silvermont */
>> +				case 55: case 74: case 77: case 90: case 93:
>> +					return at_lbr;
>> +					break;
>> +			}
>> +			break;
>> +		case 15:
>> +			switch ( boot_cpu_data.x86_model )
>> +			{
>> +				/* Pentium4/Xeon with em64t */
>> +				case 3: case 4: case 6:
>> +					return p4_lbr;
>> +					break;
>> +			}
>> +			break;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>>   static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
>>
>>   static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
>> @@ -1917,6 +2024,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   	bool pr = false;
>>   	u32 msr = msr_info->index;
>>   	u64 data = msr_info->data;
>> +	u64 supported = 0;
>> +	static const struct lbr_info *lbr = NULL;
>> +	int i = 0;
>> +	int value = 0;
>>
>>   	switch (msr) {
>>   	case MSR_AMD64_NB_CFG:
>> @@ -1948,16 +2059,34 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   		}
>>   		break;
>>   	case MSR_IA32_DEBUGCTLMSR:
>> -		if (!data) {
>> -			/* We support the non-activated case already */
>> -			break;
>> -		} else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
>> -			/* Values other than LBR and BTF are vendor-specific,
>> -			   thus reserved and should throw a #GP */
>> +		supported = DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
>> +
>> +		if (data & ~supported) {
>> +			/* Values other than LBR, BTF and FREEZE_LBRS_ON_PMI are not supported,
>> +			 * thus reserved and should throw a #GP */
>> +			vcpu_unimpl(vcpu, "unsupported MSR_IA32_DEBUGCTLMSR wrmsr: 0x%llx\n", data);
>>   			return 1;
>>   		}
>> -		vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
>> -			    __func__, data);
>> +
>> +		if (data & DEBUGCTLMSR_LBR) {
>> +			lbr = last_branch_msr_get();
>> +			if (lbr == NULL)
>> +				break;
>> +
>> +			for (; (value == 0) && lbr->count; lbr++)
>> +				for (i = 0; (value == 0) && (i < lbr->count); i++)
>> +					if ((value = kvm_x86_ops->add_atomic_switch_msr(vcpu, lbr->base + i, 0, 0)) == 0)
>> +						kvm_x86_ops->disable_intercept_guest_msr(vcpu, lbr->base + i);
>> +		}
>> +
>> +		if (value == 0) {
>> +			kvm_x86_ops->vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>> +		}
>> +		else {
>> +			/* throw a #GP */
>> +			return 1;
>> +		}
>> +
>>   		break;
>>   	case 0x200 ... 0x2ff:
>>   		return kvm_mtrr_set_msr(vcpu, msr, data);
>> @@ -2178,9 +2307,11 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>>   int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   {
>>   	switch (msr_info->index) {
>> +	case MSR_IA32_DEBUGCTLMSR:
>> +		msr_info->data = kvm_x86_ops->vmcs_read64(GUEST_IA32_DEBUGCTL);
>> +		break;
>>   	case MSR_IA32_PLATFORM_ID:
>>   	case MSR_IA32_EBL_CR_POWERON:
>> -	case MSR_IA32_DEBUGCTLMSR:
>>   	case MSR_IA32_LASTBRANCHFROMIP:
>>   	case MSR_IA32_LASTBRANCHTOIP:
>>   	case MSR_IA32_LASTINTFROMIP:
>> --
>> 1.7.12.4
>>
>>
>>
>> .
>>
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jay Zhou Oct. 14, 2015, 11:26 a.m. UTC | #3
On 12/10/2015 20:44, Paolo Bonzini wrote:
>
>
> On 12/10/2015 14:10, Jian Zhou wrote:
>> ping...
>
> I think your expectations for review RTT are a bit too optimistic.
> I have only worked 4 hours since you posted the patch...  But it was on
> my list anyway, so let's do it.

   Thank for Paolo's time to review and valuable comments. :)

> First of all, you should move the implementation entirely into vmx.c,
> because these new hooks are not acceptable:
>
>>> +	void (*vmcs_write64)(unsigned long field, u64 value);
>>> +	u64 (*vmcs_read64)(unsigned long field);
>>> +
>>> +	int (*add_atomic_switch_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 guest_val, u64 host_val);
>>> +	void (*disable_intercept_guest_msr)(struct kvm_vcpu *vcpu, u32 msr);
>
> x86.c must not have any knowledge of VMX internals such as the VMCS.
> Also, AMD have their own implementation of LBR virtualization.

   ok. These hooks will be modified in the next patch.

> In addition, the MSR numbers may differ between the guest and the host,
> because it is possible to emulate e.g. a Core CPU on a Core 2 CPU.  So I
> recommend against using the atomic switch mechanism for the from/to MSRs.

   The vLBR feature depends on vPMU, and to enable vPMU, it needs to
   specify the "cpu mode" in the guest XML as host-passthrough. I think
   the MSR numbers between the guest and the host are the same in this
   senario.

> Instead, if GUEST_DEBUGCTL.LBR = 1 you can read the from/to MSRs into an
> array stored in struct kvm_arch_vcpu at vmexit time.  Reading the
> from/to MSRs should cause a vmexit in the first implementation.  Any
> optimization of this can be done separately.

   ok. I will compare this method to atomic switch mechanism.

> As a benefit, this will force you to implement a mechanism for passing
> the contents of the MSRs to userspace and read them back.  This is
> necessary for debugging and for migration.  You will also have to
> implement support for the feature in QEMU in order to support migration
> of virtual machines that use LBRs.

   ok. Migration will be supported in the next patch.

>>> +/* Core 2 and Atom last-branch recording */
>>> +#define MSR_C2_LASTBRANCH_TOS		0x000001c9
>>> +#define MSR_C2_LASTBRANCH_0_FROM_IP	0x00000040
>>> +#define MSR_C2_LASTBRANCH_0_TO_IP	0x00000060
>>> +#define NUM_MSR_C2_LASTBRANCH_FROM_TO	4
>>> +#define NUM_MSR_ATOM_LASTBRANCH_FROM_TO	8
>>> +
>>> +struct lbr_info {
>>> +	u32 base, count;
>>> +} p4_lbr[] = {
>>> +	{ MSR_LBR_SELECT,               1 },
>>> +	{ MSR_P4_LER_FROM_LIP,          1 },
>>> +	{ MSR_P4_LER_TO_LIP,            1 },
>>> +	{ MSR_P4_LASTBRANCH_TOS,        1 },
>>> +	{ MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
>>> +	{ MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
>>> +	{ 0, 0 }
>>> +}, c2_lbr[] = {
>>> +	{ MSR_LBR_SELECT,               1 },
>>> +	{ MSR_IA32_LASTINTFROMIP,       1 },
>>> +	{ MSR_IA32_LASTINTTOIP,         1 },
>>> +	{ MSR_C2_LASTBRANCH_TOS,        1 },
>>> +	{ MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_C2_LASTBRANCH_FROM_TO },
>>> +	{ MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_C2_LASTBRANCH_FROM_TO },
>>> +	{ 0, 0 }
>>> +}, nh_lbr[] = {
>>> +	{ MSR_LBR_SELECT,               1 },
>>> +	{ MSR_IA32_LASTINTFROMIP,       1 },
>>> +	{ MSR_IA32_LASTINTTOIP,         1 },
>>> +	{ MSR_C2_LASTBRANCH_TOS,        1 },
>
> Nehalem has 16 LBR records, not 4.  I suggest that you reorganize the
> tables so that it is easy to match them against tables in the Intel SDM.
> Note that you have to compute the number of records according to the
> _guest_ CPUID, not the host CPUID.  For simplicity I suggest that you
> only enable LBR virtualization if the host machine has 16 LBR entries,

   ok. The table will be reorganized in the next patch.

> and make it possible to disable it through a kvm_intel module parameter.

   A kvm_intel module parameter will be added to permanently disable
   LBR virtualization in the next patch.

   Thanks and regards,

   Jian

> Thanks,
>
> Paolo
>
>>> +	{ MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
>>> +	{ MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
>>> +	{ 0, 0 }
>>> +}, at_lbr[] = {
>>> +	{ MSR_LBR_SELECT,               1 },
>>> +	{ MSR_IA32_LASTINTFROMIP,       1 },
>>> +	{ MSR_IA32_LASTINTTOIP,         1 },
>>> +	{ MSR_C2_LASTBRANCH_TOS,        1 },
>>> +	{ MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
>>> +	{ MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
>>> +	{ 0, 0 }
>>> +};
>>> +
>>> +static const struct lbr_info *last_branch_msr_get(void)
>>> +{
>>> +	switch ( boot_cpu_data.x86 )
>>> +	{
>>> +		case 6:
>>> +			switch ( boot_cpu_data.x86_model )
>>> +			{
>>> +				/* Core2 Duo */
>>> +				case 15:
>>> +				/* Enhanced Core */
>>> +				case 23:
>>> +					return c2_lbr;
>>> +					break;
>>> +				/* Nehalem */
>>> +				case 26: case 30: case 31: case 46:
>>> +				/* Westmere */
>>> +				case 37: case 44: case 47:
>>> +				/* Sandy Bridge */
>>> +				case 42: case 45:
>>> +				/* Ivy Bridge */
>>> +				case 58: case 62:
>>> +				/* Haswell */
>>> +				case 60: case 63: case 69: case 70:
>>> +				/* future */
>>> +				case 61: case 78:
>>> +					return nh_lbr;
>>> +					break;
>>> +				/* Atom */
>>> +				case 28: case 38: case 39: case 53: case 54:
>>> +				/* Silvermont */
>>> +				case 55: case 74: case 77: case 90: case 93:
>>> +					return at_lbr;
>>> +					break;
>>> +			}
>>> +			break;
>>> +		case 15:
>>> +			switch ( boot_cpu_data.x86_model )
>>> +			{
>>> +				/* Pentium4/Xeon with em64t */
>>> +				case 3: case 4: case 6:
>>> +					return p4_lbr;
>>> +					break;
>>> +			}
>>> +			break;
>>> +	}
>>> +
>>> +	return NULL;
>>> +}
>>> +
>>>    static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
>>>
>>>    static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
>>> @@ -1917,6 +2024,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>>    	bool pr = false;
>>>    	u32 msr = msr_info->index;
>>>    	u64 data = msr_info->data;
>>> +	u64 supported = 0;
>>> +	static const struct lbr_info *lbr = NULL;
>>> +	int i = 0;
>>> +	int value = 0;
>>>
>>>    	switch (msr) {
>>>    	case MSR_AMD64_NB_CFG:
>>> @@ -1948,16 +2059,34 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>>    		}
>>>    		break;
>>>    	case MSR_IA32_DEBUGCTLMSR:
>>> -		if (!data) {
>>> -			/* We support the non-activated case already */
>>> -			break;
>>> -		} else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
>>> -			/* Values other than LBR and BTF are vendor-specific,
>>> -			   thus reserved and should throw a #GP */
>>> +		supported = DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
>>> +
>>> +		if (data & ~supported) {
>>> +			/* Values other than LBR, BTF and FREEZE_LBRS_ON_PMI are not supported,
>>> +			 * thus reserved and should throw a #GP */
>>> +			vcpu_unimpl(vcpu, "unsupported MSR_IA32_DEBUGCTLMSR wrmsr: 0x%llx\n", data);
>>>    			return 1;
>>>    		}
>>> -		vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
>>> -			    __func__, data);
>>> +
>>> +		if (data & DEBUGCTLMSR_LBR) {
>>> +			lbr = last_branch_msr_get();
>>> +			if (lbr == NULL)
>>> +				break;
>>> +
>>> +			for (; (value == 0) && lbr->count; lbr++)
>>> +				for (i = 0; (value == 0) && (i < lbr->count); i++)
>>> +					if ((value = kvm_x86_ops->add_atomic_switch_msr(vcpu, lbr->base + i, 0, 0)) == 0)
>>> +						kvm_x86_ops->disable_intercept_guest_msr(vcpu, lbr->base + i);
>>> +		}
>>> +
>>> +		if (value == 0) {
>>> +			kvm_x86_ops->vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>>> +		}
>>> +		else {
>>> +			/* throw a #GP */
>>> +			return 1;
>>> +		}
>>> +
>>>    		break;
>>>    	case 0x200 ... 0x2ff:
>>>    		return kvm_mtrr_set_msr(vcpu, msr, data);
>>> @@ -2178,9 +2307,11 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>>>    int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>>    {
>>>    	switch (msr_info->index) {
>>> +	case MSR_IA32_DEBUGCTLMSR:
>>> +		msr_info->data = kvm_x86_ops->vmcs_read64(GUEST_IA32_DEBUGCTL);
>>> +		break;
>>>    	case MSR_IA32_PLATFORM_ID:
>>>    	case MSR_IA32_EBL_CR_POWERON:
>>> -	case MSR_IA32_DEBUGCTLMSR:
>>>    	case MSR_IA32_LASTBRANCHFROMIP:
>>>    	case MSR_IA32_LASTBRANCHTOIP:
>>>    	case MSR_IA32_LASTINTFROMIP:
>>> --
>>> 1.7.12.4
>>>
>>>
>>>
>>> .
>>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>
> .
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Oct. 14, 2015, 11:30 a.m. UTC | #4
On 14/10/2015 13:26, Jian Zhou wrote:
> On 12/10/2015 20:44, Paolo Bonzini wrote:
>> In addition, the MSR numbers may differ between the guest and the host,
>> because it is possible to emulate e.g. a Core CPU on a Core 2 CPU.  So I
>> recommend against using the atomic switch mechanism for the from/to MSRs.
> 
>   The vLBR feature depends on vPMU, and to enable vPMU, it needs to
>   specify the "cpu mode" in the guest XML as host-passthrough. I think
>   the MSR numbers between the guest and the host are the same in this
>   senario.

Does it depend on vPMU _for Linux guests_ or in general?  My impression
is that LBR can be used by the guest independent of the PMU.  You should
also write a unit test for kvm-unit-tests to test the behavior of your
implementation.

Thanks,

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jay Zhou Oct. 15, 2015, 1:51 p.m. UTC | #5
On 2015/10/14 19:30, Paolo Bonzini wrote:
>
>
> On 14/10/2015 13:26, Jian Zhou wrote:
>> On 12/10/2015 20:44, Paolo Bonzini wrote:
>>> In addition, the MSR numbers may differ between the guest and the host,
>>> because it is possible to emulate e.g. a Core CPU on a Core 2 CPU.  So I
>>> recommend against using the atomic switch mechanism for the from/to MSRs.
>>
>>    The vLBR feature depends on vPMU, and to enable vPMU, it needs to
>>    specify the "cpu mode" in the guest XML as host-passthrough. I think
>>    the MSR numbers between the guest and the host are the same in this
>>    senario.
>
> Does it depend on vPMU _for Linux guests_ or in general?  My impression
> is that LBR can be used by the guest independent of the PMU.

   I think only for Linux guests.

   I googled how to enable LBR on other guests(except Linux guests),
   e.g. Windows, and got no developer manuals about it.

   Here is an article about it:
   http://www.codeproject.com/Articles/517466/Last-branch-records-
   and-branch-tracing
   it says:
   "bit 8 of DR7 represents bit 0 of DebugCtl. This is the LBR bit."

   Intel developer manual vol 3B introduced DR7(Debug Control Register)
   and bit 8 of it on Section 17.2.4:
   "LE and GE (local and global exact breakpoint enable) flags (bits 8,
   9) — When set, these flags cause the processor to detect the exact
   instruction that caused a data breakpoint condition. For backward and
   forward compatibility with other Intel processors, we recommend that
   the LE and GE flags be set to 1 if exact breakpoints are required."

   But for now, I don't know how to test bit 8 of DR7 on Windows.

   Regards,

   Jian

>
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Oct. 15, 2015, 3:03 p.m. UTC | #6
On 15/10/2015 15:51, Jian Zhou wrote:
> 
> 
> On 2015/10/14 19:30, Paolo Bonzini wrote:
>>
>>
>> On 14/10/2015 13:26, Jian Zhou wrote:
>>> On 12/10/2015 20:44, Paolo Bonzini wrote:
>>>> In addition, the MSR numbers may differ between the guest and the host,
>>>> because it is possible to emulate e.g. a Core CPU on a Core 2 CPU. 
>>>> So I
>>>> recommend against using the atomic switch mechanism for the from/to
>>>> MSRs.
>>>
>>>    The vLBR feature depends on vPMU, and to enable vPMU, it needs to
>>>    specify the "cpu mode" in the guest XML as host-passthrough. I think
>>>    the MSR numbers between the guest and the host are the same in this
>>>    senario.
>>
>> Does it depend on vPMU _for Linux guests_ or in general?  My impression
>> is that LBR can be used by the guest independent of the PMU.
> 
>   I think only for Linux guests.
> 
>   I googled how to enable LBR on other guests(except Linux guests),
>   e.g. Windows, and got no developer manuals about it.
> 
>   Here is an article about it:
>   http://www.codeproject.com/Articles/517466/Last-branch-records-
>   and-branch-tracing
>   it says:
>   "bit 8 of DR7 represents bit 0 of DebugCtl. This is the LBR bit."

Don't worry about the operating system in the guest: you are just
emulating a processor feature, you do not care about anything except
what is written in the Intel SDM.

You can use kvm-unit-tests
(https://git.kernel.org/cgit/virt/kvm/kvm-unit-tests.git/) to write a
test for your feature.  There are existing tests for debugging features.

Thanks,

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jay Zhou Oct. 16, 2015, 12:54 a.m. UTC | #7
>>> Does it depend on vPMU _for Linux guests_ or in general?  My impression
>>> is that LBR can be used by the guest independent of the PMU.
>>
>>    I think only for Linux guests.
>>
>>    I googled how to enable LBR on other guests(except Linux guests),
>>    e.g. Windows, and got no developer manuals about it.
>>
>>    Here is an article about it:
>>    http://www.codeproject.com/Articles/517466/Last-branch-records-
>>    and-branch-tracing
>>    it says:
>>    "bit 8 of DR7 represents bit 0 of DebugCtl. This is the LBR bit."
>
> Don't worry about the operating system in the guest: you are just
> emulating a processor feature, you do not care about anything except
> what is written in the Intel SDM.
>
> You can use kvm-unit-tests
> (https://git.kernel.org/cgit/virt/kvm/kvm-unit-tests.git/) to write a
> test for your feature.  There are existing tests for debugging features.

   ok.

   Regards,

   Jian

>
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2beee03..244f68c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -887,6 +887,12 @@  struct kvm_x86_ops {
 					   gfn_t offset, unsigned long mask);
 	/* pmu operations of sub-arch */
 	const struct kvm_pmu_ops *pmu_ops;
+
+	void (*vmcs_write64)(unsigned long field, u64 value);
+	u64 (*vmcs_read64)(unsigned long field);
+
+	int (*add_atomic_switch_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 guest_val, u64 host_val);
+	void (*disable_intercept_guest_msr)(struct kvm_vcpu *vcpu, u32 msr);
 };

 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06ef490..2305308 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -159,7 +159,7 @@  module_param(ple_window_max, int, S_IRUGO);

 extern const ulong vmx_return;

-#define NR_AUTOLOAD_MSRS 8
+#define NR_AUTOLOAD_MSRS 256
 #define VMCS02_POOL_SIZE 1

 struct vmcs {
@@ -1630,6 +1630,7 @@  static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 	--m->nr;
 	m->guest[i] = m->guest[m->nr];
 	m->host[i] = m->host[m->nr];
+	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr);
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
 }
@@ -1645,7 +1646,7 @@  static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
 	vm_exit_controls_setbit(vmx, exit);
 }

-static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
+static int add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 				  u64 guest_val, u64 host_val)
 {
 	unsigned i;
@@ -1660,7 +1661,7 @@  static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 					GUEST_IA32_EFER,
 					HOST_IA32_EFER,
 					guest_val, host_val);
-			return;
+			return 0;
 		}
 		break;
 	case MSR_CORE_PERF_GLOBAL_CTRL:
@@ -1671,7 +1672,7 @@  static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 					GUEST_IA32_PERF_GLOBAL_CTRL,
 					HOST_IA32_PERF_GLOBAL_CTRL,
 					guest_val, host_val);
-			return;
+			return 0;
 		}
 		break;
 	}
@@ -1683,9 +1684,10 @@  static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 	if (i == NR_AUTOLOAD_MSRS) {
 		printk_once(KERN_WARNING "Not enough msr switch entries. "
 				"Can't add msr %x\n", msr);
-		return;
+		return -ENOSPC;
 	} else if (i == m->nr) {
 		++m->nr;
+		vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr);
 		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
 		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
 	}
@@ -1694,6 +1696,15 @@  static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 	m->guest[i].value = guest_val;
 	m->host[i].index = msr;
 	m->host[i].value = host_val;
+
+	return 0;
+}
+
+static int vmx_add_atomic_switch_msr(struct kvm_vcpu *vcpu, u32 msr, u64 guest_val, u64 host_val)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	return add_atomic_switch_msr(vmx, msr, guest_val, host_val);
 }

 static void reload_tss(void)
@@ -4332,6 +4343,20 @@  static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
 			msr, MSR_TYPE_W);
 }

+static void vmx_disable_intercept_guest_msr(struct kvm_vcpu *vcpu, u32 msr)
+{
+	if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
+		vmx_disable_intercept_msr_read_x2apic(msr);
+		vmx_disable_intercept_msr_write_x2apic(msr);
+	}
+	else {
+		if (is_long_mode(vcpu))
+			vmx_disable_intercept_for_msr(msr, true);
+		else
+			vmx_disable_intercept_for_msr(msr, false);
+	}
+}
+
 static int vmx_vm_has_apicv(struct kvm *kvm)
 {
 	return enable_apicv && irqchip_in_kernel(kvm);
@@ -4654,6 +4679,7 @@  static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 #endif

 	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autoload.guest));
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
 	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
@@ -10409,6 +10435,12 @@  static struct kvm_x86_ops vmx_x86_ops = {
 	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,

 	.pmu_ops = &intel_pmu_ops,
+
+	.vmcs_write64 = vmcs_write64,
+	.vmcs_read64 = vmcs_read64,
+
+	.add_atomic_switch_msr = vmx_add_atomic_switch_msr,
+	.disable_intercept_guest_msr = vmx_disable_intercept_guest_msr,
 };

 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 92511d4..f1fcd7c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -176,6 +176,113 @@  struct kvm_stats_debugfs_item debugfs_entries[] = {

 u64 __read_mostly host_xcr0;

+/* Netburst (P4) last-branch recording */
+#define MSR_P4_LER_FROM_LIP 		0x000001d7
+#define MSR_P4_LER_TO_LIP 		0x000001d8
+#define MSR_P4_LASTBRANCH_TOS		0x000001da
+#define MSR_P4_LASTBRANCH_0		0x000001db
+#define NUM_MSR_P4_LASTBRANCH		4
+#define MSR_P4_LASTBRANCH_0_FROM_LIP	0x00000680
+#define MSR_P4_LASTBRANCH_0_TO_LIP	0x000006c0
+#define NUM_MSR_P4_LASTBRANCH_FROM_TO	16
+
+/* Pentium M (and Core) last-branch recording */
+#define MSR_PM_LASTBRANCH_TOS		0x000001c9
+#define MSR_PM_LASTBRANCH_0		0x00000040
+#define NUM_MSR_PM_LASTBRANCH		8
+
+/* Core 2 and Atom last-branch recording */
+#define MSR_C2_LASTBRANCH_TOS		0x000001c9
+#define MSR_C2_LASTBRANCH_0_FROM_IP	0x00000040
+#define MSR_C2_LASTBRANCH_0_TO_IP	0x00000060
+#define NUM_MSR_C2_LASTBRANCH_FROM_TO	4
+#define NUM_MSR_ATOM_LASTBRANCH_FROM_TO	8
+
+struct lbr_info {
+	u32 base, count;
+} p4_lbr[] = {
+	{ MSR_LBR_SELECT,               1 },
+	{ MSR_P4_LER_FROM_LIP,          1 },
+	{ MSR_P4_LER_TO_LIP,            1 },
+	{ MSR_P4_LASTBRANCH_TOS,        1 },
+	{ MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
+	{ MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
+	{ 0, 0 }
+}, c2_lbr[] = {
+	{ MSR_LBR_SELECT,               1 },
+	{ MSR_IA32_LASTINTFROMIP,       1 },
+	{ MSR_IA32_LASTINTTOIP,         1 },
+	{ MSR_C2_LASTBRANCH_TOS,        1 },
+	{ MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_C2_LASTBRANCH_FROM_TO },
+	{ MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_C2_LASTBRANCH_FROM_TO },
+	{ 0, 0 }
+}, nh_lbr[] = {
+	{ MSR_LBR_SELECT,               1 },
+	{ MSR_IA32_LASTINTFROMIP,       1 },
+	{ MSR_IA32_LASTINTTOIP,         1 },
+	{ MSR_C2_LASTBRANCH_TOS,        1 },
+	{ MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
+	{ MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
+	{ 0, 0 }
+}, at_lbr[] = {
+	{ MSR_LBR_SELECT,               1 },
+	{ MSR_IA32_LASTINTFROMIP,       1 },
+	{ MSR_IA32_LASTINTTOIP,         1 },
+	{ MSR_C2_LASTBRANCH_TOS,        1 },
+	{ MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
+	{ MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
+	{ 0, 0 }
+};
+
+static const struct lbr_info *last_branch_msr_get(void)
+{
+	switch ( boot_cpu_data.x86 )
+	{
+		case 6:
+			switch ( boot_cpu_data.x86_model )
+			{
+				/* Core2 Duo */
+				case 15:
+				/* Enhanced Core */
+				case 23:
+					return c2_lbr;
+					break;
+				/* Nehalem */
+				case 26: case 30: case 31: case 46:
+				/* Westmere */
+				case 37: case 44: case 47:
+				/* Sandy Bridge */
+				case 42: case 45:
+				/* Ivy Bridge */
+				case 58: case 62:
+				/* Haswell */
+				case 60: case 63: case 69: case 70:
+				/* future */
+				case 61: case 78:
+					return nh_lbr;
+					break;
+				/* Atom */
+				case 28: case 38: case 39: case 53: case 54:
+				/* Silvermont */
+				case 55: case 74: case 77: case 90: case 93:
+					return at_lbr;
+					break;
+			}
+			break;
+		case 15:
+			switch ( boot_cpu_data.x86_model )
+			{
+				/* Pentium4/Xeon with em64t */
+				case 3: case 4: case 6:
+					return p4_lbr;
+					break;
+			}
+			break;
+	}
+
+	return NULL;
+}
+
 static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);

 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
@@ -1917,6 +2024,10 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	bool pr = false;
 	u32 msr = msr_info->index;
 	u64 data = msr_info->data;
+	u64 supported = 0;
+	static const struct lbr_info *lbr = NULL;
+	int i = 0;
+	int value = 0;

 	switch (msr) {
 	case MSR_AMD64_NB_CFG:
@@ -1948,16 +2059,34 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		}
 		break;
 	case MSR_IA32_DEBUGCTLMSR:
-		if (!data) {
-			/* We support the non-activated case already */
-			break;
-		} else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
-			/* Values other than LBR and BTF are vendor-specific,
-			   thus reserved and should throw a #GP */
+		supported = DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+
+		if (data & ~supported) {
+			/* Values other than LBR, BTF and FREEZE_LBRS_ON_PMI are not supported,
+			 * thus reserved and should throw a #GP */
+			vcpu_unimpl(vcpu, "unsupported MSR_IA32_DEBUGCTLMSR wrmsr: 0x%llx\n", data);
 			return 1;
 		}
-		vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
-			    __func__, data);
+
+		if (data & DEBUGCTLMSR_LBR) {
+			lbr = last_branch_msr_get();
+			if (lbr == NULL)
+				break;
+
+			for (; (value == 0) && lbr->count; lbr++)
+				for (i = 0; (value == 0) && (i < lbr->count); i++)
+					if ((value = kvm_x86_ops->add_atomic_switch_msr(vcpu, lbr->base + i, 0, 0)) == 0)
+						kvm_x86_ops->disable_intercept_guest_msr(vcpu, lbr->base + i);
+		}
+
+		if (value == 0) {
+			kvm_x86_ops->vmcs_write64(GUEST_IA32_DEBUGCTL, data);
+		}
+		else {
+			/* throw a #GP */
+			return 1;
+		}
+
 		break;
 	case 0x200 ... 0x2ff:
 		return kvm_mtrr_set_msr(vcpu, msr, data);
@@ -2178,9 +2307,11 @@  static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	switch (msr_info->index) {
+	case MSR_IA32_DEBUGCTLMSR:
+		msr_info->data = kvm_x86_ops->vmcs_read64(GUEST_IA32_DEBUGCTL);
+		break;
 	case MSR_IA32_PLATFORM_ID:
 	case MSR_IA32_EBL_CR_POWERON:
-	case MSR_IA32_DEBUGCTLMSR:
 	case MSR_IA32_LASTBRANCHFROMIP:
 	case MSR_IA32_LASTBRANCHTOIP:
 	case MSR_IA32_LASTINTFROMIP: