diff mbox

[v7,3/3] x86, apicv: add virtual x2apic support

Message ID 1355722250-7122-4-git-send-email-yang.z.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhang, Yang Z Dec. 17, 2012, 5:30 a.m. UTC
basically to benefit from apicv, we need clear MSR bitmap for
corresponding x2apic MSRs:
    0x800 - 0x8ff: no read intercept for apicv register virtualization
    TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery

Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
---
 arch/x86/kvm/vmx.c |   62 ++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 55 insertions(+), 7 deletions(-)

Comments

Gleb Natapov Dec. 20, 2012, 8:31 a.m. UTC | #1
On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
> basically to benefit from apicv, we need clear MSR bitmap for
> corresponding x2apic MSRs:
>     0x800 - 0x8ff: no read intercept for apicv register virtualization
>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery
> 
We do not set "Virtualize x2APIC mode" bit in secondary execution
control. If I read the spec correctly without that those MSR read/writes
will go straight to physical local APIC.

> Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
> ---
>  arch/x86/kvm/vmx.c |   62 ++++++++++++++++++++++++++++++++++++++++++++++------
>  1 files changed, 55 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index be66c3e..9b5e7a2 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3773,7 +3773,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
>  	spin_unlock(&vmx_vpid_lock);
>  }
>  
> -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
> +#define MSR_TYPE_R	1
> +#define MSR_TYPE_W	2
> +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> +						u32 msr, int type)
>  {
>  	int f = sizeof(unsigned long);
>  
> @@ -3786,20 +3789,52 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
>  	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
>  	 */
>  	if (msr <= 0x1fff) {
> -		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
> -		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
> +		if (type & MSR_TYPE_R)
> +			/* read-low */
> +			__clear_bit(msr, msr_bitmap + 0x000 / f);
> +
> +		if (type & MSR_TYPE_W)
> +			/* write-low */
> +			__clear_bit(msr, msr_bitmap + 0x800 / f);
> +
>  	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
>  		msr &= 0x1fff;
> -		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
> -		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
> +		if (type & MSR_TYPE_R)
> +			/* read-high */
> +			__clear_bit(msr, msr_bitmap + 0x400 / f);
> +
> +		if (type & MSR_TYPE_W)
> +			/* write-high */
> +			__clear_bit(msr, msr_bitmap + 0xc00 / f);
> +
>  	}
>  }
>  
>  static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
>  {
>  	if (!longmode_only)
> -		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> +						msr, MSR_TYPE_R | MSR_TYPE_W);
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> +						msr, MSR_TYPE_R | MSR_TYPE_W);
> +}
> +
> +static void vmx_disable_intercept_for_msr_read(u32 msr, bool longmode_only)
> +{
> +	if (!longmode_only)
> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> +						msr, MSR_TYPE_R);
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> +					msr, MSR_TYPE_R);
> +}
> +
> +static void vmx_disable_intercept_for_msr_write(u32 msr, bool longmode_only)
> +{
> +	if (!longmode_only)
> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> +						msr, MSR_TYPE_W);
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> +					msr, MSR_TYPE_W);
>  }
>  
>  /*
> @@ -7633,6 +7668,19 @@ static int __init vmx_init(void)
>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
>  
> +	if (enable_apicv_reg_vid) {
> +		int msr;
> +		for (msr = 0x800; msr <= 0x8ff; msr++)
> +			vmx_disable_intercept_for_msr_read(msr, false);
> +
> +		/* TPR */
> +		vmx_disable_intercept_for_msr_write(0x808, false);
> +		/* EOI */
> +		vmx_disable_intercept_for_msr_write(0x80b, false);
> +		/* SELF-IPI */
> +		vmx_disable_intercept_for_msr_write(0x83f, false);
> +	}
> +
>  	if (enable_ept) {
>  		kvm_mmu_set_mask_ptes(0ull,
>  			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
> -- 
> 1.7.1

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Dec. 24, 2012, 1:41 a.m. UTC | #2
Gleb Natapov wrote on 2012-12-20:
> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
>> basically to benefit from apicv, we need clear MSR bitmap for
>> corresponding x2apic MSRs:
>>     0x800 - 0x8ff: no read intercept for apicv register virtualization
>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery
> We do not set "Virtualize x2APIC mode" bit in secondary execution
> control. If I read the spec correctly without that those MSR read/writes
> will go straight to physical local APIC.
Right. Now it cannot get benefit, but we may enable it in future and then we can benefit from it.

>> Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
>> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
>> ---
>>  arch/x86/kvm/vmx.c |   62
>>  ++++++++++++++++++++++++++++++++++++++++++++++------ 1 files changed,
>>  55 insertions(+), 7 deletions(-)
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index be66c3e..9b5e7a2 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -3773,7 +3773,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
>>  	spin_unlock(&vmx_vpid_lock);
>>  }
>> -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
>> u32 msr) +#define MSR_TYPE_R	1 +#define MSR_TYPE_W	2 +static void
>> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +						u32
>> msr, int type)
>>  {
>>  	int f = sizeof(unsigned long);
>> @@ -3786,20 +3789,52 @@ static void
> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
>>  	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
>>  	 */
>>  	if (msr <= 0x1fff) {
>> -		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
>> -		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
>> +		if (type & MSR_TYPE_R)
>> +			/* read-low */
>> +			__clear_bit(msr, msr_bitmap + 0x000 / f);
>> +
>> +		if (type & MSR_TYPE_W)
>> +			/* write-low */
>> +			__clear_bit(msr, msr_bitmap + 0x800 / f);
>> +
>>  	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
>>  		msr &= 0x1fff;
>> -		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
>> -		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
>> +		if (type & MSR_TYPE_R)
>> +			/* read-high */
>> +			__clear_bit(msr, msr_bitmap + 0x400 / f);
>> +
>> +		if (type & MSR_TYPE_W)
>> +			/* write-high */
>> +			__clear_bit(msr, msr_bitmap + 0xc00 / f);
>> +
>>  	}
>>  }
>>  
>>  static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
>>  {
>>  	if (!longmode_only)
>> -		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
>> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +						msr,
>> MSR_TYPE_R | MSR_TYPE_W);
>> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +						msr,
>> MSR_TYPE_R | MSR_TYPE_W); +} + +static void
>> vmx_disable_intercept_for_msr_read(u32 msr, bool longmode_only) +{ +	if
>> (!longmode_only)
>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +						msr,
>> MSR_TYPE_R); +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
>> +					msr, MSR_TYPE_R); +} + +static void
>> vmx_disable_intercept_for_msr_write(u32 msr, bool longmode_only) +{
>> +	if (!longmode_only)
>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +						msr,
>> MSR_TYPE_W); +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
>> +					msr, MSR_TYPE_W);
>>  }
>>  
>>  /* @@ -7633,6 +7668,19 @@ static int __init vmx_init(void)
>>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
>>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
>> +	if (enable_apicv_reg_vid) {
>> +		int msr;
>> +		for (msr = 0x800; msr <= 0x8ff; msr++)
>> +			vmx_disable_intercept_for_msr_read(msr, false);
>> +
>> +		/* TPR */
>> +		vmx_disable_intercept_for_msr_write(0x808, false);
>> +		/* EOI */
>> +		vmx_disable_intercept_for_msr_write(0x80b, false);
>> +		/* SELF-IPI */
>> +		vmx_disable_intercept_for_msr_write(0x83f, false);
>> +	}
>> +
>>  	if (enable_ept) {
>>  		kvm_mmu_set_mask_ptes(0ull,
>>  			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
>> --
>> 1.7.1
> 
> --
> 			Gleb.


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Dec. 24, 2012, 2:35 a.m. UTC | #3
Zhang, Yang Z wrote on 2012-12-24:
> Gleb Natapov wrote on 2012-12-20:
>> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
>>> basically to benefit from apicv, we need clear MSR bitmap for
>>> corresponding x2apic MSRs:
>>>     0x800 - 0x8ff: no read intercept for apicv register virtualization
>>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery
>> We do not set "Virtualize x2APIC mode" bit in secondary execution
>> control. If I read the spec correctly without that those MSR read/writes
>> will go straight to physical local APIC.
> Right. Now it cannot get benefit, but we may enable it in future and then we can
> benefit from it.
how about to add the following check:
if (apicv_enabled && virtual_x2apic_enabled)
	clear_msr();


>>> Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
>>> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
>>> ---
>>>  arch/x86/kvm/vmx.c |   62
>>>  ++++++++++++++++++++++++++++++++++++++++++++++------ 1 files changed,
>>>  55 insertions(+), 7 deletions(-)
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index be66c3e..9b5e7a2 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -3773,7 +3773,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
>>>  	spin_unlock(&vmx_vpid_lock);
>>>  }
>>> -static void __vmx_disable_intercept_for_msr(unsigned long
>>> *msr_bitmap, u32 msr) +#define MSR_TYPE_R	1 +#define MSR_TYPE_W	2
>>> +static void __vmx_disable_intercept_for_msr(unsigned long
>>> *msr_bitmap, + 				u32 msr, int type)
>>>  {
>>>  	int f = sizeof(unsigned long);
>>> @@ -3786,20 +3789,52 @@ static void
>> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
>>>  	 * We can control MSRs 0x00000000-0x00001fff and
>>>  0xc0000000-0xc0001fff. 	 */ 	if (msr <= 0x1fff) {
>>> -		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
>>> -		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
>>> +		if (type & MSR_TYPE_R)
>>> +			/* read-low */
>>> +			__clear_bit(msr, msr_bitmap + 0x000 / f);
>>> +
>>> +		if (type & MSR_TYPE_W)
>>> +			/* write-low */
>>> +			__clear_bit(msr, msr_bitmap + 0x800 / f);
>>> +
>>>  	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
>>>  		msr &= 0x1fff;
>>> -		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
>>> -		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
>>> +		if (type & MSR_TYPE_R)
>>> +			/* read-high */
>>> +			__clear_bit(msr, msr_bitmap + 0x400 / f);
>>> +
>>> +		if (type & MSR_TYPE_W)
>>> +			/* write-high */
>>> +			__clear_bit(msr, msr_bitmap + 0xc00 / f);
>>> +
>>>  	}
>>>  }
>>>  
>>>  static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
>>>  {
>>>  	if (!longmode_only)
>>> -		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
>>> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
>>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, + 					msr,
>>> MSR_TYPE_R | MSR_TYPE_W);
>>> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, + 					msr,
>>> MSR_TYPE_R | MSR_TYPE_W); +} + +static void
>>> vmx_disable_intercept_for_msr_read(u32 msr, bool longmode_only) +{ +
>>> 	if (!longmode_only)
>>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, + 					msr,
>>> MSR_TYPE_R); +
>>> 	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +					msr,
>>> MSR_TYPE_R); +} + +static void vmx_disable_intercept_for_msr_write(u32
>>> msr, bool longmode_only) +{ +	if (!longmode_only)
>>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, + 					msr,
>>> MSR_TYPE_W); +
>>> 	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +					msr,
>>> MSR_TYPE_W);
>>>  }
>>>  
>>>  /* @@ -7633,6 +7668,19 @@ static int __init vmx_init(void)
>>>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
>>>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
>>> +	if (enable_apicv_reg_vid) {
>>> +		int msr;
>>> +		for (msr = 0x800; msr <= 0x8ff; msr++)
>>> +			vmx_disable_intercept_for_msr_read(msr, false);
>>> +
>>> +		/* TPR */
>>> +		vmx_disable_intercept_for_msr_write(0x808, false);
>>> +		/* EOI */
>>> +		vmx_disable_intercept_for_msr_write(0x80b, false);
>>> +		/* SELF-IPI */
>>> +		vmx_disable_intercept_for_msr_write(0x83f, false);
>>> +	}
>>> +
>>>  	if (enable_ept) {
>>>  		kvm_mmu_set_mask_ptes(0ull,
>>>  			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
>>> --
>>> 1.7.1
>> 
>> --
>> 			Gleb.
> 
> 
> Best regards,
> Yang
>


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Dec. 24, 2012, 9:23 a.m. UTC | #4
On Mon, Dec 24, 2012 at 02:35:35AM +0000, Zhang, Yang Z wrote:
> Zhang, Yang Z wrote on 2012-12-24:
> > Gleb Natapov wrote on 2012-12-20:
> >> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
> >>> basically to benefit from apicv, we need clear MSR bitmap for
> >>> corresponding x2apic MSRs:
> >>>     0x800 - 0x8ff: no read intercept for apicv register virtualization
> >>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery
> >> We do not set "Virtualize x2APIC mode" bit in secondary execution
> >> control. If I read the spec correctly without that those MSR read/writes
> >> will go straight to physical local APIC.
> > Right. Now it cannot get benefit, but we may enable it in future and then we can
> > benefit from it.
Without enabling it you cannot disable MSR intercept for x2apic MSRs.

> how about to add the following check:
> if (apicv_enabled && virtual_x2apic_enabled)
> 	clear_msr();
> 
I do not understand what do you mean here.

> 
> >>> Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
> >>> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
> >>> ---
> >>>  arch/x86/kvm/vmx.c |   62
> >>>  ++++++++++++++++++++++++++++++++++++++++++++++------ 1 files changed,
> >>>  55 insertions(+), 7 deletions(-)
> >>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >>> index be66c3e..9b5e7a2 100644
> >>> --- a/arch/x86/kvm/vmx.c
> >>> +++ b/arch/x86/kvm/vmx.c
> >>> @@ -3773,7 +3773,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
> >>>  	spin_unlock(&vmx_vpid_lock);
> >>>  }
> >>> -static void __vmx_disable_intercept_for_msr(unsigned long
> >>> *msr_bitmap, u32 msr) +#define MSR_TYPE_R	1 +#define MSR_TYPE_W	2
> >>> +static void __vmx_disable_intercept_for_msr(unsigned long
> >>> *msr_bitmap, + 				u32 msr, int type)
> >>>  {
> >>>  	int f = sizeof(unsigned long);
> >>> @@ -3786,20 +3789,52 @@ static void
> >> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
> >>>  	 * We can control MSRs 0x00000000-0x00001fff and
> >>>  0xc0000000-0xc0001fff. 	 */ 	if (msr <= 0x1fff) {
> >>> -		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
> >>> -		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
> >>> +		if (type & MSR_TYPE_R)
> >>> +			/* read-low */
> >>> +			__clear_bit(msr, msr_bitmap + 0x000 / f);
> >>> +
> >>> +		if (type & MSR_TYPE_W)
> >>> +			/* write-low */
> >>> +			__clear_bit(msr, msr_bitmap + 0x800 / f);
> >>> +
> >>>  	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> >>>  		msr &= 0x1fff;
> >>> -		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
> >>> -		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
> >>> +		if (type & MSR_TYPE_R)
> >>> +			/* read-high */
> >>> +			__clear_bit(msr, msr_bitmap + 0x400 / f);
> >>> +
> >>> +		if (type & MSR_TYPE_W)
> >>> +			/* write-high */
> >>> +			__clear_bit(msr, msr_bitmap + 0xc00 / f);
> >>> +
> >>>  	}
> >>>  }
> >>>  
> >>>  static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
> >>>  {
> >>>  	if (!longmode_only)
> >>> -		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
> >>> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
> >>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, + 					msr,
> >>> MSR_TYPE_R | MSR_TYPE_W);
> >>> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, + 					msr,
> >>> MSR_TYPE_R | MSR_TYPE_W); +} + +static void
> >>> vmx_disable_intercept_for_msr_read(u32 msr, bool longmode_only) +{ +
> >>> 	if (!longmode_only)
> >>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, + 					msr,
> >>> MSR_TYPE_R); +
> >>> 	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +					msr,
> >>> MSR_TYPE_R); +} + +static void vmx_disable_intercept_for_msr_write(u32
> >>> msr, bool longmode_only) +{ +	if (!longmode_only)
> >>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, + 					msr,
> >>> MSR_TYPE_W); +
> >>> 	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +					msr,
> >>> MSR_TYPE_W);
> >>>  }
> >>>  
> >>>  /* @@ -7633,6 +7668,19 @@ static int __init vmx_init(void)
> >>>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> >>>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> >>> +	if (enable_apicv_reg_vid) {
> >>> +		int msr;
> >>> +		for (msr = 0x800; msr <= 0x8ff; msr++)
> >>> +			vmx_disable_intercept_for_msr_read(msr, false);
> >>> +
> >>> +		/* TPR */
> >>> +		vmx_disable_intercept_for_msr_write(0x808, false);
> >>> +		/* EOI */
> >>> +		vmx_disable_intercept_for_msr_write(0x80b, false);
> >>> +		/* SELF-IPI */
> >>> +		vmx_disable_intercept_for_msr_write(0x83f, false);
> >>> +	}
> >>> +
> >>>  	if (enable_ept) {
> >>>  		kvm_mmu_set_mask_ptes(0ull,
> >>>  			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
> >>> --
> >>> 1.7.1
> >> 
> >> --
> >> 			Gleb.
> > 
> > 
> > Best regards,
> > Yang
> >
> 
> 
> Best regards,
> Yang
> 

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Dec. 24, 2012, 11:53 p.m. UTC | #5
Gleb Natapov wrote on 2012-12-24:
> On Mon, Dec 24, 2012 at 02:35:35AM +0000, Zhang, Yang Z wrote:
>> Zhang, Yang Z wrote on 2012-12-24:
>>> Gleb Natapov wrote on 2012-12-20:
>>>> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
>>>>> basically to benefit from apicv, we need clear MSR bitmap for
>>>>> corresponding x2apic MSRs:
>>>>>     0x800 - 0x8ff: no read intercept for apicv register virtualization
>>>>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery
>>>> We do not set "Virtualize x2APIC mode" bit in secondary execution
>>>> control. If I read the spec correctly without that those MSR read/writes
>>>> will go straight to physical local APIC.
>>> Right. Now it cannot get benefit, but we may enable it in future and
>>> then we can benefit from it.
> Without enabling it you cannot disable MSR intercept for x2apic MSRs.
> 
>> how about to add the following check:
>> if (apicv_enabled && virtual_x2apic_enabled)
>> 	clear_msr();
>> 
> I do not understand what do you mean here.
In this patch, it will clear MSR bitmap(0x800 -0x8ff) when apicv enabled. As you said, since kvm doesn't set "virtualize x2apic mode", APIC register virtualization never take effect. So we need to clear MSR bitmap only when apicv enabled and virtualize x2apic mode set.

>> 
>>>>> Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
>>>>> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
>>>>> ---
>>>>>  arch/x86/kvm/vmx.c |   62
>>>>>  ++++++++++++++++++++++++++++++++++++++++++++++------ 1 files
>>>>>  changed, 55 insertions(+), 7 deletions(-)
>>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>>> index be66c3e..9b5e7a2 100644
>>>>> --- a/arch/x86/kvm/vmx.c
>>>>> +++ b/arch/x86/kvm/vmx.c
>>>>> @@ -3773,7 +3773,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
>>>>>  	spin_unlock(&vmx_vpid_lock);
>>>>>  }
>>>>> -static void __vmx_disable_intercept_for_msr(unsigned long
>>>>> *msr_bitmap, u32 msr) +#define MSR_TYPE_R	1 +#define MSR_TYPE_W	2
>>>>> +static void __vmx_disable_intercept_for_msr(unsigned long
>>>>> *msr_bitmap, + 				u32 msr, int type)
>>>>>  {
>>>>>  	int f = sizeof(unsigned long);
>>>>> @@ -3786,20 +3789,52 @@ static void
>>>> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
>>>>>  	 * We can control MSRs 0x00000000-0x00001fff and
>>>>>  0xc0000000-0xc0001fff. 	 */ 	if (msr <= 0x1fff) {
>>>>> -		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
>>>>> -		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
>>>>> +		if (type & MSR_TYPE_R)
>>>>> +			/* read-low */
>>>>> +			__clear_bit(msr, msr_bitmap + 0x000 / f);
>>>>> +
>>>>> +		if (type & MSR_TYPE_W)
>>>>> +			/* write-low */
>>>>> +			__clear_bit(msr, msr_bitmap + 0x800 / f);
>>>>> +
>>>>>  	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
>>>>>  		msr &= 0x1fff;
>>>>> -		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
>>>>> -		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
>>>>> +		if (type & MSR_TYPE_R)
>>>>> +			/* read-high */
>>>>> +			__clear_bit(msr, msr_bitmap + 0x400 / f);
>>>>> +
>>>>> +		if (type & MSR_TYPE_W)
>>>>> +			/* write-high */
>>>>> +			__clear_bit(msr, msr_bitmap + 0xc00 / f);
>>>>> +
>>>>>  	}
>>>>>  }
>>>>>  
>>>>>  static void vmx_disable_intercept_for_msr(u32 msr, bool
>>>>>  longmode_only) { 	if (!longmode_only)
>>>>> -		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
>>>>> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
>>>>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +
>>>>> 					msr, MSR_TYPE_R | MSR_TYPE_W);
>>>>> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +
>>>>> 					msr, MSR_TYPE_R | MSR_TYPE_W); +} + +static void
>>>>> vmx_disable_intercept_for_msr_read(u32 msr, bool longmode_only) +{ +
>>>>> 	if (!longmode_only)
>>>>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +
>>>>> 					msr, MSR_TYPE_R); +
>>>>> 	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +
>>>>> 					msr, MSR_TYPE_R); +} + +static void
>>>>> vmx_disable_intercept_for_msr_write(u32 msr, bool longmode_only) +{
>>>>> +	if (!longmode_only)
>>>>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +
>>>>> 					msr, MSR_TYPE_W); +
>>>>> 	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +
>>>>> 					msr, MSR_TYPE_W);
>>>>>  }
>>>>>  
>>>>>  /* @@ -7633,6 +7668,19 @@ static int __init vmx_init(void)
>>>>>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
>>>>>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
>>>>> +	if (enable_apicv_reg_vid) {
>>>>> +		int msr;
>>>>> +		for (msr = 0x800; msr <= 0x8ff; msr++)
>>>>> +			vmx_disable_intercept_for_msr_read(msr, false);
>>>>> +
>>>>> +		/* TPR */
>>>>> +		vmx_disable_intercept_for_msr_write(0x808, false);
>>>>> +		/* EOI */
>>>>> +		vmx_disable_intercept_for_msr_write(0x80b, false);
>>>>> +		/* SELF-IPI */
>>>>> +		vmx_disable_intercept_for_msr_write(0x83f, false);
>>>>> +	}
>>>>> +
>>>>>  	if (enable_ept) {
>>>>>  		kvm_mmu_set_mask_ptes(0ull,
>>>>>  			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
>>>>> --
>>>>> 1.7.1
>>>> 
>>>> --
>>>> 			Gleb.
>>> 
>>> 
>>> Best regards,
>>> Yang
>>> 
>> 
>> 
>> Best regards,
>> Yang
>> 
> 
> --
> 			Gleb.


Best regards,
Yang

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Dec. 25, 2012, 6:38 a.m. UTC | #6
On Mon, Dec 24, 2012 at 11:53:37PM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2012-12-24:
> > On Mon, Dec 24, 2012 at 02:35:35AM +0000, Zhang, Yang Z wrote:
> >> Zhang, Yang Z wrote on 2012-12-24:
> >>> Gleb Natapov wrote on 2012-12-20:
> >>>> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
> >>>>> basically to benefit from apicv, we need clear MSR bitmap for
> >>>>> corresponding x2apic MSRs:
> >>>>>     0x800 - 0x8ff: no read intercept for apicv register virtualization
> >>>>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery
> >>>> We do not set "Virtualize x2APIC mode" bit in secondary execution
> >>>> control. If I read the spec correctly without that those MSR read/writes
> >>>> will go straight to physical local APIC.
> >>> Right. Now it cannot get benefit, but we may enable it in future and
> >>> then we can benefit from it.
> > Without enabling it you cannot disable MSR intercept for x2apic MSRs.
> > 
> >> how about to add the following check:
> >> if (apicv_enabled && virtual_x2apic_enabled)
> >> 	clear_msr();
> >> 
> > I do not understand what do you mean here.
> In this patch, it will clear MSR bitmap(0x800 -0x8ff) when apicv enabled. As you said, since kvm doesn't set "virtualize x2apic mode", APIC register virtualization never take effect. So we need to clear MSR bitmap only when apicv enabled and virtualize x2apic mode set.
> 
But currently it is never set.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Dec. 25, 2012, 6:42 a.m. UTC | #7
Gleb Natapov wrote on 2012-12-25:
> On Mon, Dec 24, 2012 at 11:53:37PM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2012-12-24:
>>> On Mon, Dec 24, 2012 at 02:35:35AM +0000, Zhang, Yang Z wrote:
>>>> Zhang, Yang Z wrote on 2012-12-24:
>>>>> Gleb Natapov wrote on 2012-12-20:
>>>>>> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
>>>>>>> basically to benefit from apicv, we need clear MSR bitmap for
>>>>>>> corresponding x2apic MSRs:
>>>>>>>     0x800 - 0x8ff: no read intercept for apicv register virtualization
>>>>>>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery
>>>>>> We do not set "Virtualize x2APIC mode" bit in secondary execution
>>>>>> control. If I read the spec correctly without that those MSR read/writes
>>>>>> will go straight to physical local APIC.
>>>>> Right. Now it cannot get benefit, but we may enable it in future and
>>>>> then we can benefit from it.
>>> Without enabling it you cannot disable MSR intercept for x2apic MSRs.
>>> 
>>>> how about to add the following check:
>>>> if (apicv_enabled && virtual_x2apic_enabled)
>>>> 	clear_msr();
>>>> 
>>> I do not understand what do you mean here.
>> In this patch, it will clear MSR bitmap(0x800 -0x8ff) when apicv enabled. As you
> said, since kvm doesn't set "virtualize x2apic mode", APIC register virtualization
> never take effect. So we need to clear MSR bitmap only when apicv enabled and
> virtualize x2apic mode set.
>> 
> But currently it is never set.
So you think the third patch is not necessary currently? Unless we enabled "virtualize x2apic mode".

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Dec. 25, 2012, 6:50 a.m. UTC | #8
On Tue, Dec 25, 2012 at 06:42:59AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2012-12-25:
> > On Mon, Dec 24, 2012 at 11:53:37PM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2012-12-24:
> >>> On Mon, Dec 24, 2012 at 02:35:35AM +0000, Zhang, Yang Z wrote:
> >>>> Zhang, Yang Z wrote on 2012-12-24:
> >>>>> Gleb Natapov wrote on 2012-12-20:
> >>>>>> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
> >>>>>>> basically to benefit from apicv, we need clear MSR bitmap for
> >>>>>>> corresponding x2apic MSRs:
> >>>>>>>     0x800 - 0x8ff: no read intercept for apicv register virtualization
> >>>>>>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery
> >>>>>> We do not set "Virtualize x2APIC mode" bit in secondary execution
> >>>>>> control. If I read the spec correctly without that those MSR read/writes
> >>>>>> will go straight to physical local APIC.
> >>>>> Right. Now it cannot get benefit, but we may enable it in future and
> >>>>> then we can benefit from it.
> >>> Without enabling it you cannot disable MSR intercept for x2apic MSRs.
> >>> 
> >>>> how about to add the following check:
> >>>> if (apicv_enabled && virtual_x2apic_enabled)
> >>>> 	clear_msr();
> >>>> 
> >>> I do not understand what do you mean here.
> >> In this patch, it will clear MSR bitmap(0x800 -0x8ff) when apicv enabled. As you
> > said, since kvm doesn't set "virtualize x2apic mode", APIC register virtualization
> > never take effect. So we need to clear MSR bitmap only when apicv enabled and
> > virtualize x2apic mode set.
> >> 
> > But currently it is never set.
> So you think the third patch is not necessary currently? Unless we enabled "virtualize x2apic mode".
> 
Without third patch vid will not work properly if a guest is in x2apic
mode. Actually second and third patches need to be reordered to not have
a windows where x2apic is broken. The problem is that this patch itself
is buggy since it does not set "virtualize x2apic mode" flag. It should
set the flag if vid is enabled and if the flag cannot be set vid should
be forced off.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Dec. 25, 2012, 7:25 a.m. UTC | #9
Gleb Natapov wrote on 2012-12-25:
> On Tue, Dec 25, 2012 at 06:42:59AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2012-12-25:
>>> On Mon, Dec 24, 2012 at 11:53:37PM +0000, Zhang, Yang Z wrote:
>>>> Gleb Natapov wrote on 2012-12-24:
>>>>> On Mon, Dec 24, 2012 at 02:35:35AM +0000, Zhang, Yang Z wrote:
>>>>>> Zhang, Yang Z wrote on 2012-12-24:
>>>>>>> Gleb Natapov wrote on 2012-12-20:
>>>>>>>> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
>>>>>>>>> basically to benefit from apicv, we need clear MSR bitmap for
>>>>>>>>> corresponding x2apic MSRs:
>>>>>>>>>     0x800 - 0x8ff: no read intercept for apicv register virtualization
>>>>>>>>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery
>>>>>>>> We do not set "Virtualize x2APIC mode" bit in secondary execution
>>>>>>>> control. If I read the spec correctly without that those MSR read/writes
>>>>>>>> will go straight to physical local APIC.
>>>>>>> Right. Now it cannot get benefit, but we may enable it in future and
>>>>>>> then we can benefit from it.
>>>>> Without enabling it you cannot disable MSR intercept for x2apic MSRs.
>>>>> 
>>>>>> how about to add the following check:
>>>>>> if (apicv_enabled && virtual_x2apic_enabled)
>>>>>> 	clear_msr();
>>>>>> 
>>>>> I do not understand what do you mean here.
>>>> In this patch, it will clear MSR bitmap(0x800 -0x8ff) when apicv enabled. As
> you
>>> said, since kvm doesn't set "virtualize x2apic mode", APIC register
>>> virtualization never take effect. So we need to clear MSR bitmap only
>>> when apicv enabled and virtualize x2apic mode set.
>>>> 
>>> But currently it is never set.
>> So you think the third patch is not necessary currently? Unless we
>> enabled "virtualize x2apic mode".
>> 
> Without third patch vid will not work properly if a guest is in x2apic
> mode. Actually second and third patches need to be reordered to not have
> a windows where x2apic is broken. The problem is that this patch itself
> is buggy since it does not set "virtualize x2apic mode" flag. It should
> set the flag if vid is enabled and if the flag cannot be set vid should
> be forced off.
In what conditions this flag cannot be set? I think the only case is that KVM doesn't expose the x2apic capability to guest, if this is true, the guest will never use x2apic and we still can use vid.

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Dec. 25, 2012, 7:31 a.m. UTC | #10
On Tue, Dec 25, 2012 at 07:25:15AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2012-12-25:
> > On Tue, Dec 25, 2012 at 06:42:59AM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2012-12-25:
> >>> On Mon, Dec 24, 2012 at 11:53:37PM +0000, Zhang, Yang Z wrote:
> >>>> Gleb Natapov wrote on 2012-12-24:
> >>>>> On Mon, Dec 24, 2012 at 02:35:35AM +0000, Zhang, Yang Z wrote:
> >>>>>> Zhang, Yang Z wrote on 2012-12-24:
> >>>>>>> Gleb Natapov wrote on 2012-12-20:
> >>>>>>>> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
> >>>>>>>>> basically to benefit from apicv, we need clear MSR bitmap for
> >>>>>>>>> corresponding x2apic MSRs:
> >>>>>>>>>     0x800 - 0x8ff: no read intercept for apicv register virtualization
> >>>>>>>>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery
> >>>>>>>> We do not set "Virtualize x2APIC mode" bit in secondary execution
> >>>>>>>> control. If I read the spec correctly without that those MSR read/writes
> >>>>>>>> will go straight to physical local APIC.
> >>>>>>> Right. Now it cannot get benefit, but we may enable it in future and
> >>>>>>> then we can benefit from it.
> >>>>> Without enabling it you cannot disable MSR intercept for x2apic MSRs.
> >>>>> 
> >>>>>> how about to add the following check:
> >>>>>> if (apicv_enabled && virtual_x2apic_enabled)
> >>>>>> 	clear_msr();
> >>>>>> 
> >>>>> I do not understand what do you mean here.
> >>>> In this patch, it will clear MSR bitmap(0x800 -0x8ff) when apicv enabled. As
> > you
> >>> said, since kvm doesn't set "virtualize x2apic mode", APIC register
> >>> virtualization never take effect. So we need to clear MSR bitmap only
> >>> when apicv enabled and virtualize x2apic mode set.
> >>>> 
> >>> But currently it is never set.
> >> So you think the third patch is not necessary currently? Unless we
> >> enabled "virtualize x2apic mode".
> >> 
> > Without third patch vid will not work properly if a guest is in x2apic
> > mode. Actually second and third patches need to be reordered to not have
> > a windows where x2apic is broken. The problem is that this patch itself
> > is buggy since it does not set "virtualize x2apic mode" flag. It should
> > set the flag if vid is enabled and if the flag cannot be set vid should
> > be forced off.
> In what conditions this flag cannot be set? I think the only case is that KVM doesn't expose the x2apic capability to guest, if this is true, the guest will never use x2apic and we still can use vid.
> 
We can indeed set "virtualize x2apic mode" unconditionally since it does
not take any effect if x2apic MSRs are intercepted.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Dec. 25, 2012, 7:46 a.m. UTC | #11
Gleb Natapov wrote on 2012-12-25:
> On Tue, Dec 25, 2012 at 07:25:15AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2012-12-25:
>>> On Tue, Dec 25, 2012 at 06:42:59AM +0000, Zhang, Yang Z wrote:
>>>> Gleb Natapov wrote on 2012-12-25:
>>>>> On Mon, Dec 24, 2012 at 11:53:37PM +0000, Zhang, Yang Z wrote:
>>>>>> Gleb Natapov wrote on 2012-12-24:
>>>>>>> On Mon, Dec 24, 2012 at 02:35:35AM +0000, Zhang, Yang Z wrote:
>>>>>>>> Zhang, Yang Z wrote on 2012-12-24:
>>>>>>>>> Gleb Natapov wrote on 2012-12-20:
>>>>>>>>>> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
>>>>>>>>>>> basically to benefit from apicv, we need clear MSR bitmap for
>>>>>>>>>>> corresponding x2apic MSRs:
>>>>>>>>>>>     0x800 - 0x8ff: no read intercept for apicv register virtualization
>>>>>>>>>>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt
> delivery
>>>>>>>>>> We do not set "Virtualize x2APIC mode" bit in secondary
>>>>>>>>>> execution control. If I read the spec correctly without that
>>>>>>>>>> those MSR read/writes will go straight to physical local APIC.
>>>>>>>>> Right. Now it cannot get benefit, but we may enable it in future and
>>>>>>>>> then we can benefit from it.
>>>>>>> Without enabling it you cannot disable MSR intercept for x2apic MSRs.
>>>>>>> 
>>>>>>>> how about to add the following check:
>>>>>>>> if (apicv_enabled && virtual_x2apic_enabled)
>>>>>>>> 	clear_msr();
>>>>>>>> 
>>>>>>> I do not understand what do you mean here.
>>>>>> In this patch, it will clear MSR bitmap(0x800 -0x8ff) when apicv enabled.
> As
>>> you
>>>>> said, since kvm doesn't set "virtualize x2apic mode", APIC register
>>>>> virtualization never take effect. So we need to clear MSR bitmap only
>>>>> when apicv enabled and virtualize x2apic mode set.
>>>>>> 
>>>>> But currently it is never set.
>>>> So you think the third patch is not necessary currently? Unless we
>>>> enabled "virtualize x2apic mode".
>>>> 
>>> Without third patch vid will not work properly if a guest is in x2apic
>>> mode. Actually second and third patches need to be reordered to not have
>>> a windows where x2apic is broken. The problem is that this patch itself
>>> is buggy since it does not set "virtualize x2apic mode" flag. It should
>>> set the flag if vid is enabled and if the flag cannot be set vid should
>>> be forced off.
>> In what conditions this flag cannot be set? I think the only case is that KVM
> doesn't expose the x2apic capability to guest, if this is true, the guest will never
> use x2apic and we still can use vid.
>> 
> We can indeed set "virtualize x2apic mode" unconditionally since it does
> not take any effect if x2apic MSRs are intercepted.
No. Since "Virtual APIC access" must be cleared if "virtualize x2apic mode" is set, and if guest still use xAPIC, then there should be lots of ept violations for apic access emulation. This will hurt performance.
We should only set "virtualize x2apic mode" when guest really uses x2apic(guest set bit 11 of APIC_BASE_MSR).

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Dec. 25, 2012, 7:52 a.m. UTC | #12
On Tue, Dec 25, 2012 at 07:46:53AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2012-12-25:
> > On Tue, Dec 25, 2012 at 07:25:15AM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2012-12-25:
> >>> On Tue, Dec 25, 2012 at 06:42:59AM +0000, Zhang, Yang Z wrote:
> >>>> Gleb Natapov wrote on 2012-12-25:
> >>>>> On Mon, Dec 24, 2012 at 11:53:37PM +0000, Zhang, Yang Z wrote:
> >>>>>> Gleb Natapov wrote on 2012-12-24:
> >>>>>>> On Mon, Dec 24, 2012 at 02:35:35AM +0000, Zhang, Yang Z wrote:
> >>>>>>>> Zhang, Yang Z wrote on 2012-12-24:
> >>>>>>>>> Gleb Natapov wrote on 2012-12-20:
> >>>>>>>>>> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
> >>>>>>>>>>> basically to benefit from apicv, we need clear MSR bitmap for
> >>>>>>>>>>> corresponding x2apic MSRs:
> >>>>>>>>>>>     0x800 - 0x8ff: no read intercept for apicv register virtualization
> >>>>>>>>>>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt
> > delivery
> >>>>>>>>>> We do not set "Virtualize x2APIC mode" bit in secondary
> >>>>>>>>>> execution control. If I read the spec correctly without that
> >>>>>>>>>> those MSR read/writes will go straight to physical local APIC.
> >>>>>>>>> Right. Now it cannot get benefit, but we may enable it in future and
> >>>>>>>>> then we can benefit from it.
> >>>>>>> Without enabling it you cannot disable MSR intercept for x2apic MSRs.
> >>>>>>> 
> >>>>>>>> how about to add the following check:
> >>>>>>>> if (apicv_enabled && virtual_x2apic_enabled)
> >>>>>>>> 	clear_msr();
> >>>>>>>> 
> >>>>>>> I do not understand what do you mean here.
> >>>>>> In this patch, it will clear MSR bitmap(0x800 -0x8ff) when apicv enabled.
> > As
> >>> you
> >>>>> said, since kvm doesn't set "virtualize x2apic mode", APIC register
> >>>>> virtualization never take effect. So we need to clear MSR bitmap only
> >>>>> when apicv enabled and virtualize x2apic mode set.
> >>>>>> 
> >>>>> But currently it is never set.
> >>>> So you think the third patch is not necessary currently? Unless we
> >>>> enabled "virtualize x2apic mode".
> >>>> 
> >>> Without third patch vid will not work properly if a guest is in x2apic
> >>> mode. Actually second and third patches need to be reordered to not have
> >>> a windows where x2apic is broken. The problem is that this patch itself
> >>> is buggy since it does not set "virtualize x2apic mode" flag. It should
> >>> set the flag if vid is enabled and if the flag cannot be set vid should
> >>> be forced off.
> >> In what conditions this flag cannot be set? I think the only case is that KVM
> > doesn't expose the x2apic capability to guest, if this is true, the guest will never
> > use x2apic and we still can use vid.
> >> 
> > We can indeed set "virtualize x2apic mode" unconditionally since it does
> > not take any effect if x2apic MSRs are intercepted.
> No. Since "Virtual APIC access" must be cleared if "virtualize x2apic mode" is set, and if guest still use xAPIC, then there should be lots of ept violations for apic access emulation. This will hurt performance.
Stupid HW, why this pointless limitation? Can you point me where SDM says that?

> We should only set "virtualize x2apic mode" when guest really uses x2apic(guest set bit 11 of APIC_BASE_MSR).
> 
Looks like SDM force us to.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z Dec. 25, 2012, 8:24 a.m. UTC | #13
Gleb Natapov wrote on 2012-12-25:
> On Tue, Dec 25, 2012 at 07:46:53AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2012-12-25:
>>> On Tue, Dec 25, 2012 at 07:25:15AM +0000, Zhang, Yang Z wrote:
>>>> Gleb Natapov wrote on 2012-12-25:
>>>>> On Tue, Dec 25, 2012 at 06:42:59AM +0000, Zhang, Yang Z wrote:
>>>>>> Gleb Natapov wrote on 2012-12-25:
>>>>>>> On Mon, Dec 24, 2012 at 11:53:37PM +0000, Zhang, Yang Z wrote:
>>>>>>>> Gleb Natapov wrote on 2012-12-24:
>>>>>>>>> On Mon, Dec 24, 2012 at 02:35:35AM +0000, Zhang, Yang Z wrote:
>>>>>>>>>> Zhang, Yang Z wrote on 2012-12-24:
>>>>>>>>>>> Gleb Natapov wrote on 2012-12-20:
>>>>>>>>>>>> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
>>>>>>>>>>>>> basically to benefit from apicv, we need clear MSR bitmap for
>>>>>>>>>>>>> corresponding x2apic MSRs:
>>>>>>>>>>>>>     0x800 - 0x8ff: no read intercept for apicv register
>>>>>>>>>>>>>     virtualization TPR,EOI,SELF-IPI: no write intercept for
>>>>>>>>>>>>>     virtual interrupt
>>> delivery
>>>>>>>>>>>> We do not set "Virtualize x2APIC mode" bit in secondary
>>>>>>>>>>>> execution control. If I read the spec correctly without that
>>>>>>>>>>>> those MSR read/writes will go straight to physical local APIC.
>>>>>>>>>>> Right. Now it cannot get benefit, but we may enable it in
>>>>>>>>>>> future and then we can benefit from it.
>>>>>>>>> Without enabling it you cannot disable MSR intercept for x2apic
>>>>>>>>> MSRs.
>>>>>>>>> 
>>>>>>>>>> how about to add the following check:
>>>>>>>>>> if (apicv_enabled && virtual_x2apic_enabled)
>>>>>>>>>> 	clear_msr();
>>>>>>>>>> 
>>>>>>>>> I do not understand what do you mean here.
>>>>>>>> In this patch, it will clear MSR bitmap(0x800 -0x8ff) when apicv
> enabled.
>>> As
>>>>> you
>>>>>>> said, since kvm doesn't set "virtualize x2apic mode", APIC register
>>>>>>> virtualization never take effect. So we need to clear MSR bitmap only
>>>>>>> when apicv enabled and virtualize x2apic mode set.
>>>>>>>> 
>>>>>>> But currently it is never set.
>>>>>> So you think the third patch is not necessary currently? Unless we
>>>>>> enabled "virtualize x2apic mode".
>>>>>> 
>>>>> Without third patch vid will not work properly if a guest is in x2apic
>>>>> mode. Actually second and third patches need to be reordered to not have
>>>>> a windows where x2apic is broken. The problem is that this patch itself
>>>>> is buggy since it does not set "virtualize x2apic mode" flag. It should
>>>>> set the flag if vid is enabled and if the flag cannot be set vid should
>>>>> be forced off.
>>>> In what conditions this flag cannot be set? I think the only case is that KVM
>>> doesn't expose the x2apic capability to guest, if this is true, the
>>> guest will never use x2apic and we still can use vid.
>>>> 
>>> We can indeed set "virtualize x2apic mode" unconditionally since it does
>>> not take any effect if x2apic MSRs are intercepted.
>> No. Since "Virtual APIC access" must be cleared if "virtualize x2apic mode" is set,
> and if guest still use xAPIC, then there should be lots of ept violations for apic
> access emulation. This will hurt performance.
> Stupid HW, why this pointless limitation? Can you point me where SDM says that?
Vol 3, 26.2.1.1

>> We should only set "virtualize x2apic mode" when guest really uses
>> x2apic(guest set bit 11 of APIC_BASE_MSR).
>> 
> Looks like SDM force us to.
> 
> --
> 			Gleb.


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Dec. 25, 2012, 11:58 a.m. UTC | #14
On Tue, Dec 25, 2012 at 08:24:43AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2012-12-25:
> > On Tue, Dec 25, 2012 at 07:46:53AM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2012-12-25:
> >>> On Tue, Dec 25, 2012 at 07:25:15AM +0000, Zhang, Yang Z wrote:
> >>>> Gleb Natapov wrote on 2012-12-25:
> >>>>> On Tue, Dec 25, 2012 at 06:42:59AM +0000, Zhang, Yang Z wrote:
> >>>>>> Gleb Natapov wrote on 2012-12-25:
> >>>>>>> On Mon, Dec 24, 2012 at 11:53:37PM +0000, Zhang, Yang Z wrote:
> >>>>>>>> Gleb Natapov wrote on 2012-12-24:
> >>>>>>>>> On Mon, Dec 24, 2012 at 02:35:35AM +0000, Zhang, Yang Z wrote:
> >>>>>>>>>> Zhang, Yang Z wrote on 2012-12-24:
> >>>>>>>>>>> Gleb Natapov wrote on 2012-12-20:
> >>>>>>>>>>>> On Mon, Dec 17, 2012 at 01:30:50PM +0800, Yang Zhang wrote:
> >>>>>>>>>>>>> basically to benefit from apicv, we need clear MSR bitmap for
> >>>>>>>>>>>>> corresponding x2apic MSRs:
> >>>>>>>>>>>>>     0x800 - 0x8ff: no read intercept for apicv register
> >>>>>>>>>>>>>     virtualization TPR,EOI,SELF-IPI: no write intercept for
> >>>>>>>>>>>>>     virtual interrupt
> >>> delivery
> >>>>>>>>>>>> We do not set "Virtualize x2APIC mode" bit in secondary
> >>>>>>>>>>>> execution control. If I read the spec correctly without that
> >>>>>>>>>>>> those MSR read/writes will go straight to physical local APIC.
> >>>>>>>>>>> Right. Now it cannot get benefit, but we may enable it in
> >>>>>>>>>>> future and then we can benefit from it.
> >>>>>>>>> Without enabling it you cannot disable MSR intercept for x2apic
> >>>>>>>>> MSRs.
> >>>>>>>>> 
> >>>>>>>>>> how about to add the following check:
> >>>>>>>>>> if (apicv_enabled && virtual_x2apic_enabled)
> >>>>>>>>>> 	clear_msr();
> >>>>>>>>>> 
> >>>>>>>>> I do not understand what do you mean here.
> >>>>>>>> In this patch, it will clear MSR bitmap(0x800 -0x8ff) when apicv
> > enabled.
> >>> As
> >>>>> you
> >>>>>>> said, since kvm doesn't set "virtualize x2apic mode", APIC register
> >>>>>>> virtualization never take effect. So we need to clear MSR bitmap only
> >>>>>>> when apicv enabled and virtualize x2apic mode set.
> >>>>>>>> 
> >>>>>>> But currently it is never set.
> >>>>>> So you think the third patch is not necessary currently? Unless we
> >>>>>> enabled "virtualize x2apic mode".
> >>>>>> 
> >>>>> Without third patch vid will not work properly if a guest is in x2apic
> >>>>> mode. Actually second and third patches need to be reordered to not have
> >>>>> a windows where x2apic is broken. The problem is that this patch itself
> >>>>> is buggy since it does not set "virtualize x2apic mode" flag. It should
> >>>>> set the flag if vid is enabled and if the flag cannot be set vid should
> >>>>> be forced off.
> >>>> In what conditions this flag cannot be set? I think the only case is that KVM
> >>> doesn't expose the x2apic capability to guest, if this is true, the
> >>> guest will never use x2apic and we still can use vid.
> >>>> 
> >>> We can indeed set "virtualize x2apic mode" unconditionally since it does
> >>> not take any effect if x2apic MSRs are intercepted.
> >> No. Since "Virtual APIC access" must be cleared if "virtualize x2apic mode" is set,
> > and if guest still use xAPIC, then there should be lots of ept violations for apic
> > access emulation. This will hurt performance.
> > Stupid HW, why this pointless limitation? Can you point me where SDM says that?
> Vol 3, 26.2.1.1
> 
Thanks.

> >> We should only set "virtualize x2apic mode" when guest really uses
> >> x2apic(guest set bit 11 of APIC_BASE_MSR).
> >> 
> > Looks like SDM force us to.
> > 
And we can disable x2apic MSR interception only after "virtualize x2apic
mode" is set i.e when guest sets bit 11 of APIC_BASE_MSR.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index be66c3e..9b5e7a2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3773,7 +3773,10 @@  static void free_vpid(struct vcpu_vmx *vmx)
 	spin_unlock(&vmx_vpid_lock);
 }
 
-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
+#define MSR_TYPE_R	1
+#define MSR_TYPE_W	2
+static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+						u32 msr, int type)
 {
 	int f = sizeof(unsigned long);
 
@@ -3786,20 +3789,52 @@  static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
 	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
 	 */
 	if (msr <= 0x1fff) {
-		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
-		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
+		if (type & MSR_TYPE_R)
+			/* read-low */
+			__clear_bit(msr, msr_bitmap + 0x000 / f);
+
+		if (type & MSR_TYPE_W)
+			/* write-low */
+			__clear_bit(msr, msr_bitmap + 0x800 / f);
+
 	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
 		msr &= 0x1fff;
-		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
-		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
+		if (type & MSR_TYPE_R)
+			/* read-high */
+			__clear_bit(msr, msr_bitmap + 0x400 / f);
+
+		if (type & MSR_TYPE_W)
+			/* write-high */
+			__clear_bit(msr, msr_bitmap + 0xc00 / f);
+
 	}
 }
 
 static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
 {
 	if (!longmode_only)
-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
-	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
+		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
+						msr, MSR_TYPE_R | MSR_TYPE_W);
+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
+						msr, MSR_TYPE_R | MSR_TYPE_W);
+}
+
+static void vmx_disable_intercept_for_msr_read(u32 msr, bool longmode_only)
+{
+	if (!longmode_only)
+		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
+						msr, MSR_TYPE_R);
+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
+					msr, MSR_TYPE_R);
+}
+
+static void vmx_disable_intercept_for_msr_write(u32 msr, bool longmode_only)
+{
+	if (!longmode_only)
+		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
+						msr, MSR_TYPE_W);
+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
+					msr, MSR_TYPE_W);
 }
 
 /*
@@ -7633,6 +7668,19 @@  static int __init vmx_init(void)
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
 
+	if (enable_apicv_reg_vid) {
+		int msr;
+		for (msr = 0x800; msr <= 0x8ff; msr++)
+			vmx_disable_intercept_for_msr_read(msr, false);
+
+		/* TPR */
+		vmx_disable_intercept_for_msr_write(0x808, false);
+		/* EOI */
+		vmx_disable_intercept_for_msr_write(0x80b, false);
+		/* SELF-IPI */
+		vmx_disable_intercept_for_msr_write(0x83f, false);
+	}
+
 	if (enable_ept) {
 		kvm_mmu_set_mask_ptes(0ull,
 			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,