KVM: vmx: speed up MSR bitmap merge
diff mbox

Message ID c4ca74eb-b738-310e-1240-df33565889ab@redhat.com
State New
Headers show

Commit Message

David Hildenbrand Dec. 18, 2017, 8:45 a.m. UTC
On 13.12.2017 14:30, Paolo Bonzini wrote:
> The bulk of the MSR bitmap is either immutable, or can be copied from
> the L1 bitmap.  By initializing it at VMXON time, and copying the mutable
> parts one long at a time on vmentry (rather than one bit), about 4000
> clock cycles (30%) can be saved on a nested VMLAUNCH/VMRESUME.
> 
> The resulting for loop only has four iterations, so it is cheap enough
> to reinitialize the MSR write bitmaps on every iteration, and it makes
> the code simpler.
> 
> Suggested-by: Jim Mattson <jmattson@google.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/vmx.c | 57 ++++++++++++++++++++++++++++--------------------------
>  1 file changed, 30 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 1458cb52de68..ee214b4112af 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -5217,11 +5217,6 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
>  {
>  	int f = sizeof(unsigned long);
>  
> -	if (!cpu_has_vmx_msr_bitmap()) {
> -		WARN_ON(1);
> -		return;
> -	}
> -
>  	/*
>  	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
>  	 * have the write-low and read-high bitmap offsets the wrong way round.
> @@ -7493,6 +7488,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
>  				(unsigned long *)__get_free_page(GFP_KERNEL);
>  		if (!vmx->nested.msr_bitmap)
>  			goto out_msr_bitmap;
> +		memset(vmx->nested.msr_bitmap, 0xff, PAGE_SIZE);
>  	}
>  
>  	vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
> @@ -10325,36 +10321,43 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>  	/* This shortcut is ok because we support only x2APIC MSRs so far. */
>  	if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
>  		return false;
> +	if (WARN_ON_ONCE(!cpu_has_vmx_msr_bitmap()))
> +		return false;

IMHO it would be nicer to always call nested_vmx_merge_msr_bitmap() and
make calling code less ugly:


 }
@@ -10318,6 +10314,10 @@ static inline bool
nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
        unsigned long *msr_bitmap_l1;
        unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;

+       if (!cpu_has_vmx_msr_bitmap())
+               return false;
+       if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
+               return false;
        /* This shortcut is ok because we support only x2APIC MSRs so
far. */
        if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
                return false;



>  
>  	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
>  	if (is_error_page(page))
>  		return false;
> -	msr_bitmap_l1 = (unsigned long *)kmap(page);
>  
> -	memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
> +	msr_bitmap_l1 = (unsigned long *)kmap(page);


Wouldn't it be easier to simply set everything to 0xff as before and
then only handle the one special case where you don't do that? e.g. the
complete else part would be gone.

> +	if (nested_cpu_has_apic_reg_virt(vmcs12)) {
> +		/* Disable read intercept for all MSRs between 0x800 and 0x8ff.  */
> +		for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
> +			unsigned word = msr / BITS_PER_LONG;
> +			msr_bitmap_l0[word] = msr_bitmap_l1[word];
> +			msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
> +		}
> +	} else {
> +		for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
> +			unsigned word = msr / BITS_PER_LONG;
> +			msr_bitmap_l0[word] = ~0;
> +			msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
> +		}
> +	}
>  
> -	if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
> -		if (nested_cpu_has_apic_reg_virt(vmcs12))
> -			for (msr = 0x800; msr <= 0x8ff; msr++)
> -				nested_vmx_disable_intercept_for_msr(
> -					msr_bitmap_l1, msr_bitmap_l0,
> -					msr, MSR_TYPE_R);
> +	nested_vmx_disable_intercept_for_msr(
> +		msr_bitmap_l1, msr_bitmap_l0,
> +		APIC_BASE_MSR + (APIC_TASKPRI >> 4),
> +		MSR_TYPE_W);

I'd vote for indenting the parameters properly (even though we exceed 80
chars by 1 then :) )

>  
> +	if (nested_cpu_has_vid(vmcs12)) {
>  		nested_vmx_disable_intercept_for_msr(
> -				msr_bitmap_l1, msr_bitmap_l0,
> -				APIC_BASE_MSR + (APIC_TASKPRI >> 4),
> -				MSR_TYPE_R | MSR_TYPE_W);
> -
> -		if (nested_cpu_has_vid(vmcs12)) {
> -			nested_vmx_disable_intercept_for_msr(
> -				msr_bitmap_l1, msr_bitmap_l0,
> -				APIC_BASE_MSR + (APIC_EOI >> 4),
> -				MSR_TYPE_W);
> -			nested_vmx_disable_intercept_for_msr(
> -				msr_bitmap_l1, msr_bitmap_l0,
> -				APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
> -				MSR_TYPE_W);
> -		}
> +			msr_bitmap_l1, msr_bitmap_l0,
> +			APIC_BASE_MSR + (APIC_EOI >> 4),
> +			MSR_TYPE_W);
> +		nested_vmx_disable_intercept_for_msr(
> +			msr_bitmap_l1, msr_bitmap_l0,
> +			APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
> +			MSR_TYPE_W);
>  	}
>  	kunmap(page);
>  	kvm_release_page_clean(page);
>

Comments

Bandan Das Dec. 18, 2017, 3:51 p.m. UTC | #1
David Hildenbrand <david@redhat.com> writes:
...
>>  	vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
>> @@ -10325,36 +10321,43 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>>  	/* This shortcut is ok because we support only x2APIC MSRs so far. */
>>  	if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
>>  		return false;
>> +	if (WARN_ON_ONCE(!cpu_has_vmx_msr_bitmap()))
>> +		return false;
>
> IMHO it would be nicer to always call nested_vmx_merge_msr_bitmap() and
> make calling code less ugly:
>
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index ee214b4112af..d4f06fc643ae 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -10238,11 +10238,7 @@ static void nested_get_vmcs12_pages(struct
> kvm_vcpu *vcpu,
>                         (unsigned long)(vmcs12->posted_intr_desc_addr &
>                         (PAGE_SIZE - 1)));
>         }
> -       if (cpu_has_vmx_msr_bitmap() &&
> -           nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) &&
> -           nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
> -               ;
> -       else
> +       if (!nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
>                 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
>                                 CPU_BASED_USE_MSR_BITMAPS);
>  }
> @@ -10318,6 +10314,10 @@ static inline bool
> nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>         unsigned long *msr_bitmap_l1;
>         unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
>
> +       if (!cpu_has_vmx_msr_bitmap())
> +               return false;
> +       if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
> +               return false;

This looks good, otherwise the WARN_ON_ONCE just seems like an unnecessary
check since the function is only called once.

Bandan

>         /* This shortcut is ok because we support only x2APIC MSRs so
> far. */
>         if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
>                 return false;
>
>
>
>>  
>>  	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
>>  	if (is_error_page(page))
>>  		return false;
>> -	msr_bitmap_l1 = (unsigned long *)kmap(page);
>>  
>> -	memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
>> +	msr_bitmap_l1 = (unsigned long *)kmap(page);
>
>
> Wouldn't it be easier to simply set everything to 0xff as before and
> then only handle the one special case where you don't do that? e.g. the
> complete else part would be gone.
>
>> +	if (nested_cpu_has_apic_reg_virt(vmcs12)) {
>> +		/* Disable read intercept for all MSRs between 0x800 and 0x8ff.  */
>> +		for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
>> +			unsigned word = msr / BITS_PER_LONG;
>> +			msr_bitmap_l0[word] = msr_bitmap_l1[word];
>> +			msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
>> +		}
>> +	} else {
>> +		for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
>> +			unsigned word = msr / BITS_PER_LONG;
>> +			msr_bitmap_l0[word] = ~0;
>> +			msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
>> +		}
>> +	}
>>  
>> -	if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
>> -		if (nested_cpu_has_apic_reg_virt(vmcs12))
>> -			for (msr = 0x800; msr <= 0x8ff; msr++)
>> -				nested_vmx_disable_intercept_for_msr(
>> -					msr_bitmap_l1, msr_bitmap_l0,
>> -					msr, MSR_TYPE_R);
>> +	nested_vmx_disable_intercept_for_msr(
>> +		msr_bitmap_l1, msr_bitmap_l0,
>> +		APIC_BASE_MSR + (APIC_TASKPRI >> 4),
>> +		MSR_TYPE_W);
>
> I'd vote for indenting the parameters properly (even though we exceed 80
> chars by 1 then :) )
>
>>  
>> +	if (nested_cpu_has_vid(vmcs12)) {
>>  		nested_vmx_disable_intercept_for_msr(
>> -				msr_bitmap_l1, msr_bitmap_l0,
>> -				APIC_BASE_MSR + (APIC_TASKPRI >> 4),
>> -				MSR_TYPE_R | MSR_TYPE_W);
>> -
>> -		if (nested_cpu_has_vid(vmcs12)) {
>> -			nested_vmx_disable_intercept_for_msr(
>> -				msr_bitmap_l1, msr_bitmap_l0,
>> -				APIC_BASE_MSR + (APIC_EOI >> 4),
>> -				MSR_TYPE_W);
>> -			nested_vmx_disable_intercept_for_msr(
>> -				msr_bitmap_l1, msr_bitmap_l0,
>> -				APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
>> -				MSR_TYPE_W);
>> -		}
>> +			msr_bitmap_l1, msr_bitmap_l0,
>> +			APIC_BASE_MSR + (APIC_EOI >> 4),
>> +			MSR_TYPE_W);
>> +		nested_vmx_disable_intercept_for_msr(
>> +			msr_bitmap_l1, msr_bitmap_l0,
>> +			APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
>> +			MSR_TYPE_W);
>>  	}
>>  	kunmap(page);
>>  	kvm_release_page_clean(page);
>>
Paolo Bonzini Dec. 18, 2017, 4:03 p.m. UTC | #2
> > -	memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
> > +	msr_bitmap_l1 = (unsigned long *)kmap(page);
> 
> Wouldn't it be easier to simply set everything to 0xff as before and
> then only handle the one special case where you don't do that? e.g. the
> complete else part would be gone.

It can change from one vmentry to the next, so I'm not sure how you'd
do that.

> > +	nested_vmx_disable_intercept_for_msr(
> > +		msr_bitmap_l1, msr_bitmap_l0,
> > +		APIC_BASE_MSR + (APIC_TASKPRI >> 4),
> > +		MSR_TYPE_W);
> 
> I'd vote for indenting the parameters properly (even though we exceed 80
> chars by 1 then :) )

Ok, will do.  Regarding the WARN, I'm undecided.  I like the idea of not
calling a function that works on MSR bitmaps unless you have one...

Paolo
David Hildenbrand Dec. 18, 2017, 4:13 p.m. UTC | #3
> 
>>> +	nested_vmx_disable_intercept_for_msr(
>>> +		msr_bitmap_l1, msr_bitmap_l0,
>>> +		APIC_BASE_MSR + (APIC_TASKPRI >> 4),
>>> +		MSR_TYPE_W);
>>
>> I'd vote for indenting the parameters properly (even though we exceed 80
>> chars by 1 then :) )
> 
> Ok, will do.  Regarding the WARN, I'm undecided.  I like the idea of not
> calling a function that works on MSR bitmaps unless you have one...

At least from a point of documentation it should be fine:

"return false to indicate that we do not use the hardware"

+gcc will inline this either way.

> 
> Paolo
>

Patch
diff mbox

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ee214b4112af..d4f06fc643ae 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10238,11 +10238,7 @@  static void nested_get_vmcs12_pages(struct
kvm_vcpu *vcpu,
                        (unsigned long)(vmcs12->posted_intr_desc_addr &
                        (PAGE_SIZE - 1)));
        }
-       if (cpu_has_vmx_msr_bitmap() &&
-           nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) &&
-           nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
-               ;
-       else
+       if (!nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
                vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
                                CPU_BASED_USE_MSR_BITMAPS);