Message ID | CACzj_yUu5omqFL7u8NE6vyqXGy_-dAeukRspJha+EiGXVBzmYg@mail.gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
V2luY3kgVmFuIHdyb3RlIG9uIDIwMTUtMDEtMjQ6DQo+IEN1cnJlbnRseSwgaWYgTDEgZW5hYmxl cyBNU1JfQklUTUFQLCB3ZSB3aWxsIGVtdWxhdGUgdGhpcyBmZWF0dXJlLCBhbGwgb2YgTDIncw0K PiBtc3IgYWNjZXNzIGlzIGludGVyY2VwdGVkIGJ5IEwwLiBTaW5jZSBtYW55IGZlYXR1cmVzIGxp a2UgdmlydHVhbGl6ZSB4MmFwaWMgbW9kZQ0KPiBoYXMgYSBjb21wbGljYXRlZCBsb2dpYyBhbmQg aXQgaXMgZGlmZmljdWx0IGZvciB1cyB0byBlbXVsYXRlLCB3ZSBzaG91bGQgdXNlDQo+IGhhcmR3 YXJlIGFuZCBtZXJnZSB0aGUgYml0bWFwLg0KPiANCj4gVGhpcyBwYXRjaCBpbnRyb2R1Y2VzIG5l c3RlZF92bXhfbWVyZ2VfbXNyX2JpdG1hcCBmb3IgZnV0dXJlIHVzZS4NCj4gDQo+IFNpZ25lZC1v ZmYtYnk6IFdpbmN5IFZhbiA8ZmFud2VueWkwNTI5QGdtYWlsLmNvbT4NCj4gLS0tDQo+ICBhcmNo L3g4Ni9rdm0vdm14LmMgfCAgIDcxDQo+ICsrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysr KysrKysrKysrKystLS0tLS0tLQ0KPiAgMSBmaWxlcyBjaGFuZ2VkLCA2MCBpbnNlcnRpb25zKCsp LCAxMSBkZWxldGlvbnMoLSkNCj4gDQo+IGRpZmYgLS1naXQgYS9hcmNoL3g4Ni9rdm0vdm14LmMg Yi9hcmNoL3g4Ni9rdm0vdm14LmMgaW5kZXggYzk4NzM3NC4uMzZkMDcyNA0KPiAxMDA2NDQNCj4g LS0tIGEvYXJjaC94ODYva3ZtL3ZteC5jDQo+ICsrKyBiL2FyY2gveDg2L2t2bS92bXguYw0KPiBA QCAtNzk4LDYgKzc5OCw3IEBAIHN0YXRpYyB1bnNpZ25lZCBsb25nICp2bXhfbXNyX2JpdG1hcF9s ZWdhY3k7DQo+IHN0YXRpYyB1bnNpZ25lZCBsb25nICp2bXhfbXNyX2JpdG1hcF9sb25nbW9kZTsg IHN0YXRpYyB1bnNpZ25lZCBsb25nDQo+ICp2bXhfbXNyX2JpdG1hcF9sZWdhY3lfeDJhcGljOyAg c3RhdGljIHVuc2lnbmVkIGxvbmcNCj4gKnZteF9tc3JfYml0bWFwX2xvbmdtb2RlX3gyYXBpYzsN Cj4gK3N0YXRpYyB1bnNpZ25lZCBsb25nICp2bXhfbXNyX2JpdG1hcF9uZXN0ZWQ7DQo+ICBzdGF0 aWMgdW5zaWduZWQgbG9uZyAqdm14X3ZtcmVhZF9iaXRtYXA7ICBzdGF0aWMgdW5zaWduZWQgbG9u Zw0KPiAqdm14X3Ztd3JpdGVfYml0bWFwOw0KPiANCj4gQEAgLTU4MTIsMTMgKzU4MTMsMTggQEAg c3RhdGljIF9faW5pdCBpbnQgaGFyZHdhcmVfc2V0dXAodm9pZCkNCj4gICAgICAgICAgICAgICAg ICAgICAgICAgICAgICAgICAodW5zaWduZWQgbG9uZw0KPiAqKV9fZ2V0X2ZyZWVfcGFnZShHRlBf S0VSTkVMKTsNCj4gICAgICAgICBpZiAoIXZteF9tc3JfYml0bWFwX2xvbmdtb2RlX3gyYXBpYykN Cj4gICAgICAgICAgICAgICAgIGdvdG8gb3V0NDsNCj4gKw0KPiArICAgICAgIHZteF9tc3JfYml0 bWFwX25lc3RlZCA9ICh1bnNpZ25lZCBsb25nDQo+ICopX19nZXRfZnJlZV9wYWdlKEdGUF9LRVJO RUwpOw0KPiArICAgICAgIGlmICghdm14X21zcl9iaXRtYXBfbmVzdGVkKQ0KPiArICAgICAgICAg ICAgICAgZ290byBvdXQ1Ow0KPiArDQoNClNpbmNlIHRoZSBuZXN0ZWQgdmlydHVhbGl6YXRpb24g aXMgb2ZmIGJ5IGRlZmF1bHQuIEl0J3MgYmV0dGVyIHRvIGFsbG9jYXRlIHRoZSBwYWdlDQpvbmx5 IHdoZW4gbmVzdGVkIGlzIHRydWUuIE1heWJlIGFkZGluZyB0aGUgZm9sbG93aW5nIGNoZWNrIGlz IGJldHRlcjoNCg0KaWYgKG5lc3RlZCkgew0KICAgICAgICB2bXhfbXNyX2JpdG1hcF9uZXN0ZWQg PSAodW5zaWduZWQgbG9uZyAqKV9fZ2V0X2ZyZWVfcGFnZShHRlBfS0VSTkVMKTsNCiAgICAgICAg aWYgKCF2bXhfbXNyX2JpdG1hcF9uZXN0ZWQpDQogICAgICAgICAgICAgICAgZ290byBvdXQ1Ow0K fQ0KDQouLi5zbmlwLi4uDQoNCj4gKw0KPiArLyoNCj4gKyAqIE1lcmdlIEwwJ3MgYW5kIEwxJ3Mg TVNSIGJpdG1hcCwgcmV0dXJuIGZhbHNlIHRvIGluZGljYXRlIHRoYXQNCj4gKyAqIHdlIGRvIG5v dCB1c2UgdGhlIGhhcmR3YXJlLg0KPiArICovDQo+ICtzdGF0aWMgaW5saW5lIGJvb2wgbmVzdGVk X3ZteF9tZXJnZV9tc3JfYml0bWFwKHN0cnVjdCBrdm1fdmNwdSAqdmNwdSwNCj4gKyAgICAgICAg ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJ1Y3Qgdm1jczEyDQo+ICp2 bWNzMTIpIHsNCj4gKyAgICAgICByZXR1cm4gZmFsc2U7DQo+ICt9DQo+ICsNCg0KVGhlIGZvbGxv d2luZyBwYXRjaGVzIGhhdmUgbm90aGluZyB0byBkbyB3aXRoIHRoZSBNU1IgY29udHJvbC4gV2h5 IGxlYXZlIHRoZSBmdW5jdGlvbiBlbXB0eSBoZXJlPw0KDQpCZXN0IHJlZ2FyZHMsDQpZYW5nDQoN Cg0K -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Wincy Van wrote on 2015-01-24: > When L2 is using x2apic, we can use virtualize x2apic mode to gain higher > performance, especially in apicv case. > > This patch also introduces nested_vmx_check_apicv_controls for the nested > apicv patches. > > Signed-off-by: Wincy Van <fanwenyi0529@gmail.com> > --- ...snip... > static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) { > if (!longmode_only) > @@ -8344,7 +8394,68 @@ static int > nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, static > inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, > struct vmcs12 > *vmcs12) { > - return false; > + struct page *page; > + unsigned long *msr_bitmap; > + > + if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) > + return false; > + > + page = nested_get_page(vcpu, vmcs12->msr_bitmap); > + if (!page) { > + WARN_ON(1); > + return false; > + } > + msr_bitmap = (unsigned long *)kmap(page); > + if (!msr_bitmap) { > + nested_release_page_clean(page); > + WARN_ON(1); > + return false; > + } > + > + memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); > + > + if (nested_cpu_has_virt_x2apic_mode(vmcs12)) > + /* TPR is allowed */ > + nested_vmx_disable_intercept_for_msr(msr_bitmap, > + vmx_msr_bitmap_nested, > + APIC_BASE_MSR + (APIC_TASKPRI >> > 4), > + MSR_TYPE_R | MSR_TYPE_W); I didn't understand what this function does? Per my understanding, you only need to set the (vmx_msr_bitmap_nested = vmcs01->msr_bitmap | vmcs12->msr_bitmap) and inject the nested vmexit to L1 if the bit in vmcs12->msr_bitmap is setting. Am I missing some patches? Best regards, Yang
Zhang, Yang Z wrote on 2015-01-28: > Wincy Van wrote on 2015-01-24: >> When L2 is using x2apic, we can use virtualize x2apic mode to gain >> higher performance, especially in apicv case. >> >> This patch also introduces nested_vmx_check_apicv_controls for the >> nested apicv patches. Sorry, replied on the wrong thread.:( >> >> Signed-off-by: Wincy Van <fanwenyi0529@gmail.com> >> --- > > ...snip... > >> static void vmx_disable_intercept_for_msr(u32 msr, bool >> longmode_only) > { >> if (!longmode_only) >> @@ -8344,7 +8394,68 @@ static int >> nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, static >> inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, >> struct vmcs12 >> *vmcs12) { >> - return false; >> + struct page *page; >> + unsigned long *msr_bitmap; >> + >> + if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) >> + return false; >> + >> + page = nested_get_page(vcpu, vmcs12->msr_bitmap); >> + if (!page) { >> + WARN_ON(1); >> + return false; >> + } >> + msr_bitmap = (unsigned long *)kmap(page); >> + if (!msr_bitmap) { >> + nested_release_page_clean(page); >> + WARN_ON(1); >> + return false; >> + } >> + >> + memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); >> + >> + if (nested_cpu_has_virt_x2apic_mode(vmcs12)) >> + /* TPR is allowed */ >> + nested_vmx_disable_intercept_for_msr(msr_bitmap, >> + vmx_msr_bitmap_nested, >> + APIC_BASE_MSR + (APIC_TASKPRI >> >> 4), >> + MSR_TYPE_R | MSR_TYPE_W); > > I didn't understand what this function does? Per my understanding, you > only need to set the (vmx_msr_bitmap_nested = vmcs01->msr_bitmap | > vmcs12->msr_bitmap) and inject the nested vmexit to L1 if the bit in > vmcs12->msr_bitmap is setting. Am I missing some patches? > > Best regards, > Yang > Best regards, Yang
On Wed, Jan 28, 2015 at 4:00 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote: >> @@ -5812,13 +5813,18 @@ static __init int hardware_setup(void) >> (unsigned long >> *)__get_free_page(GFP_KERNEL); >> if (!vmx_msr_bitmap_longmode_x2apic) >> goto out4; >> + >> + vmx_msr_bitmap_nested = (unsigned long >> *)__get_free_page(GFP_KERNEL); >> + if (!vmx_msr_bitmap_nested) >> + goto out5; >> + > > Since the nested virtualization is off by default. It's better to allocate the page > only when nested is true. Maybe adding the following check is better: > > if (nested) { > vmx_msr_bitmap_nested = (unsigned long *)__get_free_page(GFP_KERNEL); > if (!vmx_msr_bitmap_nested) > goto out5; > } Agreed. Will do. > > ...snip... > >> + >> +/* >> + * Merge L0's and L1's MSR bitmap, return false to indicate that >> + * we do not use the hardware. >> + */ >> +static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, >> + struct vmcs12 >> *vmcs12) { >> + return false; >> +} >> + > > The following patches have nothing to do with the MSR control. Why leave the function empty here? > No. In patch "Enable nested virtualize x2apic mode", we will return false if L1 disabled virt_x2apic_mode, then the hardware MSR-bitmap control is disabled. This is faster than rebuilding the vmx_msr_bitmap_nested. This function returns false here to indicate that we do not use the hardware. Since It is not only virtualize x2apic mode using this, other features may use this either. I think it isn't suitable to introduce this function in other patches. > Best regards, > Yang > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jan 28, 2015 at 4:05 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote: >> @@ -8344,7 +8394,68 @@ static int >> nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, static >> inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, >> struct vmcs12 >> *vmcs12) { >> - return false; >> + struct page *page; >> + unsigned long *msr_bitmap; >> + >> + if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) >> + return false; >> + >> + page = nested_get_page(vcpu, vmcs12->msr_bitmap); >> + if (!page) { >> + WARN_ON(1); >> + return false; >> + } >> + msr_bitmap = (unsigned long *)kmap(page); >> + if (!msr_bitmap) { >> + nested_release_page_clean(page); >> + WARN_ON(1); >> + return false; >> + } >> + >> + memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); >> + >> + if (nested_cpu_has_virt_x2apic_mode(vmcs12)) >> + /* TPR is allowed */ >> + nested_vmx_disable_intercept_for_msr(msr_bitmap, >> + vmx_msr_bitmap_nested, >> + APIC_BASE_MSR + (APIC_TASKPRI >> >> 4), >> + MSR_TYPE_R | MSR_TYPE_W); > > I didn't understand what this function does? Per my understanding, you only need to set the (vmx_msr_bitmap_nested = vmcs01->msr_bitmap | vmcs12->msr_bitmap) and inject the nested vmexit to L1 if the bit in vmcs12->msr_bitmap is setting. Am I missing some patches? In the beginning, I want to do "vmcs01->msr_bitmap | vmcs12->msr_bitmap", but I remember that there isn't a instruction to do a bit or operation in two pages effectively, so I do the bit or operation in nested_vmx_disable_intercept_for_msr. If the hardware do not support this, I think it is faster if we deal with the bits on demand. nested_vmx_merge_msr_bitmap is used to merge L0's and L1's bitmaps, any features can put their logic here. If there is a faster way, please teach me how to do it : ) Thanks, Wincy > > Best regards, > Yang > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Wincy Van wrote on 2015-01-28: > On Wed, Jan 28, 2015 at 4:00 PM, Zhang, Yang Z <yang.z.zhang@intel.com> > wrote: >>> @@ -5812,13 +5813,18 @@ static __init int hardware_setup(void) >>> (unsigned long >>> *)__get_free_page(GFP_KERNEL); >>> if (!vmx_msr_bitmap_longmode_x2apic) >>> goto out4; >>> + >>> + vmx_msr_bitmap_nested = (unsigned long >>> *)__get_free_page(GFP_KERNEL); >>> + if (!vmx_msr_bitmap_nested) >>> + goto out5; >>> + >> >> Since the nested virtualization is off by default. It's better to >> allocate the page only when nested is true. Maybe adding the following >> check is better: >> >> if (nested) { >> vmx_msr_bitmap_nested = (unsigned long >> *)__get_free_page(GFP_KERNEL); if (!vmx_msr_bitmap_nested) >> goto out5; >> } > > Agreed. Will do. > >> >> ...snip... >> >>> + >>> +/* >>> + * Merge L0's and L1's MSR bitmap, return false to indicate that >>> + * we do not use the hardware. >>> + */ >>> +static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, >>> + struct vmcs12 >>> *vmcs12) { >>> + return false; >>> +} >>> + >> >> The following patches have nothing to do with the MSR control. Why >> leave the function empty here? >> > > No. In patch "Enable nested virtualize x2apic mode", we will return > false if L1 disabled virt_x2apic_mode, then the hardware MSR-bitmap control is disabled. > This is faster than rebuilding the vmx_msr_bitmap_nested. > This function returns false here to indicate that we do not use the hardware. > Since It is not only virtualize x2apic mode using this, other features > may use this either. I think it isn't suitable to introduce this function in other patches. Yes, rebuilding is more costly. But your current implementation cannot leverage the APICv feature correctly. I replied in another thread. > > >> Best regards, >> Yang >> >> Best regards, Yang
Wincy Van wrote on 2015-01-28: > On Wed, Jan 28, 2015 at 4:05 PM, Zhang, Yang Z <yang.z.zhang@intel.com> > wrote: >>> @@ -8344,7 +8394,68 @@ static int >>> nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, static >>> inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, >>> struct vmcs12 >>> *vmcs12) { - return false; + struct page *page; + >>> unsigned long *msr_bitmap; + + if >>> (!nested_cpu_has_virt_x2apic_mode(vmcs12)) + return >>> false; + + page = nested_get_page(vcpu, vmcs12->msr_bitmap); + >>> if (!page) { + WARN_ON(1); + return >>> false; + } + msr_bitmap = (unsigned long *)kmap(page); + >>> if (!msr_bitmap) { + >>> nested_release_page_clean(page); + WARN_ON(1); + >>> return false; + } + + memset(vmx_msr_bitmap_nested, >>> 0xff, PAGE_SIZE); + + if >>> (nested_cpu_has_virt_x2apic_mode(vmcs12)) + /* TPR is >>> allowed */ + >>> nested_vmx_disable_intercept_for_msr(msr_bitmap, + >>> vmx_msr_bitmap_nested, + >>> APIC_BASE_MSR + (APIC_TASKPRI >> 4), + >>> MSR_TYPE_R | MSR_TYPE_W); >> >> I didn't understand what this function does? Per my understanding, >> you only > need to set the (vmx_msr_bitmap_nested = vmcs01->msr_bitmap | > vmcs12->msr_bitmap) and inject the nested vmexit to L1 if the bit in > vmcs12->msr_bitmap is setting. Am I missing some patches? > > In the beginning, I want to do "vmcs01->msr_bitmap | > vmcs12->msr_bitmap", but I remember that there isn't a instruction to > do a bit or operation in two pages effectively, so I do the bit or > operation in nested_vmx_disable_intercept_for_msr. If the hardware do > not support this, I think it is faster if we deal with the bits on demand. > nested_vmx_merge_msr_bitmap is used to merge L0's and L1's bitmaps, > any features can put their logic here. You construct the nested_msr_bitmap based on vmcs12->msr_bitmap, what happens if vmcs01->msr_bitmap want to trap this msr? > > If there is a faster way, please teach me how to do it : ) You are right. Interception should be much faster. > > Thanks, > Wincy > > >> >> Best regards, >> Yang >> >> Best regards, Yang
On Wed, Jan 28, 2015 at 7:25 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote: > Wincy Van wrote on 2015-01-28: >> On Wed, Jan 28, 2015 at 4:05 PM, Zhang, Yang Z <yang.z.zhang@intel.com> >> wrote: >>>> @@ -8344,7 +8394,68 @@ static int >>>> nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, static >>>> inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, >>>> struct vmcs12 >>>> *vmcs12) { - return false; + struct page *page; + >>>> unsigned long *msr_bitmap; + + if >>>> (!nested_cpu_has_virt_x2apic_mode(vmcs12)) + return >>>> false; + + page = nested_get_page(vcpu, vmcs12->msr_bitmap); + >>>> if (!page) { + WARN_ON(1); + return >>>> false; + } + msr_bitmap = (unsigned long *)kmap(page); + >>>> if (!msr_bitmap) { + >>>> nested_release_page_clean(page); + WARN_ON(1); + >>>> return false; + } + + memset(vmx_msr_bitmap_nested, >>>> 0xff, PAGE_SIZE); + + if >>>> (nested_cpu_has_virt_x2apic_mode(vmcs12)) + /* TPR is >>>> allowed */ + >>>> nested_vmx_disable_intercept_for_msr(msr_bitmap, + >>>> vmx_msr_bitmap_nested, + >>>> APIC_BASE_MSR + (APIC_TASKPRI >> 4), + >>>> MSR_TYPE_R | MSR_TYPE_W); >>> >>> I didn't understand what this function does? Per my understanding, >>> you only >> need to set the (vmx_msr_bitmap_nested = vmcs01->msr_bitmap | >> vmcs12->msr_bitmap) and inject the nested vmexit to L1 if the bit in >> vmcs12->msr_bitmap is setting. Am I missing some patches? >> >> In the beginning, I want to do "vmcs01->msr_bitmap | >> vmcs12->msr_bitmap", but I remember that there isn't a instruction to >> do a bit or operation in two pages effectively, so I do the bit or >> operation in nested_vmx_disable_intercept_for_msr. If the hardware do >> not support this, I think it is faster if we deal with the bits on demand. >> nested_vmx_merge_msr_bitmap is used to merge L0's and L1's bitmaps, >> any features can put their logic here. > > You construct the nested_msr_bitmap based on vmcs12->msr_bitmap, what happens if vmcs01->msr_bitmap want to trap this msr? > If L0 wants to intercept a msr, we should set vmx_msr_bitmap_legacy(_x2apic) and vmx_msr_bitmap_longmode(_x2apic), and that bitmaps should only be loaded in non-nested entry. Currently we only clear the corresponding bits if L1 enables virtualize x2apic mode, all the other bits are all set. On nested entry, we load nested_msr_bitmap, on nested vmexit, we will restore L0's bitmap. In the nested entry, L0 only care about L2's msr access, not L1's. I think that in nested entry, nested_msr_bitmap is "vmcs01->msr_bitmap" as your mentioned above. Thanks, Wincy -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
V2luY3kgVmFuIHdyb3RlIG9uIDIwMTUtMDEtMjg6DQo+IE9uIFdlZCwgSmFuIDI4LCAyMDE1IGF0 IDc6MjUgUE0sIFpoYW5nLCBZYW5nIFogPHlhbmcuei56aGFuZ0BpbnRlbC5jb20+DQo+IHdyb3Rl Og0KPj4gV2luY3kgVmFuIHdyb3RlIG9uIDIwMTUtMDEtMjg6DQo+Pj4gT24gV2VkLCBKYW4gMjgs IDIwMTUgYXQgNDowNSBQTSwgWmhhbmcsIFlhbmcgWg0KPj4+IDx5YW5nLnouemhhbmdAaW50ZWwu Y29tPg0KPj4+IHdyb3RlOg0KPj4+Pj4gQEAgLTgzNDQsNyArODM5NCw2OCBAQCBzdGF0aWMgaW50 DQo+Pj4+PiBuZXN0ZWRfdm14X2NoZWNrX21zcl9iaXRtYXBfY29udHJvbHMoc3RydWN0IGt2bV92 Y3B1ICp2Y3B1LCBzdGF0aWMNCj4+Pj4+IGlubGluZSBib29sIG5lc3RlZF92bXhfbWVyZ2VfbXNy X2JpdG1hcChzdHJ1Y3Qga3ZtX3ZjcHUgKnZjcHUsDQo+Pj4+PiAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgICAgICAgICAgICAgICAgICAgIHN0cnVjdCB2bWNzMTINCj4+Pj4+ICp2bWNzMTIp ICB7IC0gICAgICAgcmV0dXJuIGZhbHNlOyArICAgICAgIHN0cnVjdCBwYWdlICpwYWdlOyArDQo+ Pj4+PiB1bnNpZ25lZCBsb25nICptc3JfYml0bWFwOyArICsgICAgICAgaWYNCj4+Pj4+ICghbmVz dGVkX2NwdV9oYXNfdmlydF94MmFwaWNfbW9kZSh2bWNzMTIpKSArICAgICAgICAgICAgICAgcmV0 dXJuDQo+Pj4+PiBmYWxzZTsgKyArICAgICAgIHBhZ2UgPSBuZXN0ZWRfZ2V0X3BhZ2UodmNwdSwg dm1jczEyLT5tc3JfYml0bWFwKTsNCj4gKw0KPj4+Pj4gICAgIGlmICghcGFnZSkgeyArICAgICAg ICAgICAgICAgV0FSTl9PTigxKTsgKw0KPiByZXR1cm4NCj4+Pj4+IGZhbHNlOyArICAgICAgIH0g KyAgICAgICBtc3JfYml0bWFwID0gKHVuc2lnbmVkIGxvbmcgKilrbWFwKHBhZ2UpOyArDQo+Pj4+ PiAgICAgaWYgKCFtc3JfYml0bWFwKSB7ICsNCj4+Pj4+IG5lc3RlZF9yZWxlYXNlX3BhZ2VfY2xl YW4ocGFnZSk7ICsgICAgICAgICAgICAgICBXQVJOX09OKDEpOyArDQo+Pj4+PiAgICAgICByZXR1 cm4gZmFsc2U7ICsgICAgICAgfSArICsNCj4gbWVtc2V0KHZteF9tc3JfYml0bWFwX25lc3RlZCwN Cj4+Pj4+IDB4ZmYsIFBBR0VfU0laRSk7ICsgKyAgICAgICBpZg0KPj4+Pj4gKG5lc3RlZF9jcHVf aGFzX3ZpcnRfeDJhcGljX21vZGUodm1jczEyKSkgKyAgICAgICAgICAgICAgIC8qIFRQUiBpcw0K Pj4+Pj4gYWxsb3dlZCAqLyArIG5lc3RlZF92bXhfZGlzYWJsZV9pbnRlcmNlcHRfZm9yX21zciht c3JfYml0bWFwLCArDQo+Pj4+PiAgICAgICAgICAgdm14X21zcl9iaXRtYXBfbmVzdGVkLCArIEFQ SUNfQkFTRV9NU1IgKyAoQVBJQ19UQVNLUFJJDQo+Pj4+Pj4+IDQpLCArIE1TUl9UWVBFX1IgfCBN U1JfVFlQRV9XKTsNCj4+Pj4gDQo+Pj4+IEkgZGlkbid0IHVuZGVyc3RhbmQgd2hhdCB0aGlzIGZ1 bmN0aW9uIGRvZXM/IFBlciBteSB1bmRlcnN0YW5kaW5nLA0KPj4+PiB5b3Ugb25seQ0KPj4+IG5l ZWQgdG8gc2V0IHRoZSAodm14X21zcl9iaXRtYXBfbmVzdGVkID0gdm1jczAxLT5tc3JfYml0bWFw IHwNCj4+PiB2bWNzMTItPm1zcl9iaXRtYXApIGFuZCBpbmplY3QgdGhlIG5lc3RlZCB2bWV4aXQg dG8gTDEgaWYgdGhlIGJpdA0KPj4+IHZtY3MxMi0+aW4gbXNyX2JpdG1hcCBpcyBzZXR0aW5nLiBB bSBJIG1pc3Npbmcgc29tZSBwYXRjaGVzPw0KPj4+IA0KPj4+IEluIHRoZSBiZWdpbm5pbmcsIEkg d2FudCB0byBkbyAidm1jczAxLT5tc3JfYml0bWFwIHwNCj4+PiB2bWNzMTItPm1zcl9iaXRtYXAi LCBidXQgSSByZW1lbWJlciB0aGF0IHRoZXJlIGlzbid0IGEgaW5zdHJ1Y3Rpb24NCj4+PiB2bWNz MTItPnRvDQo+Pj4gZG8gYSBiaXQgb3Igb3BlcmF0aW9uIGluIHR3byBwYWdlcyBlZmZlY3RpdmVs eSwgc28gSSBkbyB0aGUgYml0IG9yDQo+Pj4gb3BlcmF0aW9uIGluIG5lc3RlZF92bXhfZGlzYWJs ZV9pbnRlcmNlcHRfZm9yX21zci4gSWYgdGhlIGhhcmR3YXJlDQo+Pj4gZG8gbm90IHN1cHBvcnQg dGhpcywgSSB0aGluayBpdCBpcyBmYXN0ZXIgaWYgd2UgZGVhbCB3aXRoIHRoZSBiaXRzIG9uIGRl bWFuZC4NCj4+PiBuZXN0ZWRfdm14X21lcmdlX21zcl9iaXRtYXAgaXMgdXNlZCB0byBtZXJnZSBM MCdzIGFuZCBMMSdzIGJpdG1hcHMsDQo+Pj4gYW55IGZlYXR1cmVzIGNhbiBwdXQgdGhlaXIgbG9n aWMgaGVyZS4NCj4+IA0KPj4gWW91IGNvbnN0cnVjdCB0aGUgbmVzdGVkX21zcl9iaXRtYXAgYmFz ZWQgb24gdm1jczEyLT5tc3JfYml0bWFwLCB3aGF0DQo+PiBoYXBwZW5zIGlmIHZtY3MwMS0+bXNy X2JpdG1hcCB3YW50IHRvIHRyYXAgdGhpcyBtc3I/DQo+PiANCj4gDQo+IElmIEwwIHdhbnRzIHRv IGludGVyY2VwdCBhIG1zciwgd2Ugc2hvdWxkIHNldA0KPiB2bXhfbXNyX2JpdG1hcF9sZWdhY3ko X3gyYXBpYykgYW5kIHZteF9tc3JfYml0bWFwX2xvbmdtb2RlKF94MmFwaWMpLA0KPiBhbmQgdGhh dCBiaXRtYXBzIHNob3VsZCBvbmx5IGJlIGxvYWRlZCBpbiBub24tbmVzdGVkIGVudHJ5Lg0KPiBD dXJyZW50bHkgd2Ugb25seSBjbGVhciB0aGUgY29ycmVzcG9uZGluZyBiaXRzIGlmIEwxIGVuYWJs ZXMNCj4gdmlydHVhbGl6ZSB4MmFwaWMgbW9kZSwgYWxsIHRoZSBvdGhlciBiaXRzIGFyZSBhbGwg c2V0LiBPbiBuZXN0ZWQNCj4gZW50cnksIHdlIGxvYWQgbmVzdGVkX21zcl9iaXRtYXAsIG9uIG5l c3RlZCB2bWV4aXQsIHdlIHdpbGwgcmVzdG9yZSBMMCdzIGJpdG1hcC4NCj4gSW4gdGhlIG5lc3Rl ZCBlbnRyeSwgTDAgb25seSBjYXJlIGFib3V0IEwyJ3MgbXNyIGFjY2Vzcywgbm90IEwxJ3MuIEkN Cj4gdGhpbmsgdGhhdCBpbiBuZXN0ZWQgZW50cnksIG5lc3RlZF9tc3JfYml0bWFwIGlzICJ2bWNz MDEtPm1zcl9iaXRtYXAiDQo+IGFzIHlvdXIgbWVudGlvbmVkIGFib3ZlLg0KDQpNbW0uLi4gSSB0 aGluayBpZiB0aGUgYml0IGlzIHNldHRpbmcgaW4gdm1jczAxLT5tc3JfYml0bWFwIG1lYW5zIHdo ZW5ldmVyIHRoZSBWQ1BVKG5vIG1hdHRlciBpbiBuZXN0ZWQgZ3Vlc3QgbW9kZSBvciBub3QpIGFj Y2Vzc2VzIHRoaXMgbXNyIHNob3VsZCBjYXVzZSB2bWV4aXQsIG5vdCBvbmx5IEwxLiBUaGF0J3Mg d2h5IG5lZWQgdG8gY29uc3RydWN0IHRoZSB2bWNzMDItPm1zcl9iaXRtYXAgYmFzZWQgb24gKHZt Y3MwMS0+bXNyX2JpdG1hcCBPUmVkIHZtY3MxMi0+bXNyX2JpdG1hcCkuDQoNCj4gDQo+IFRoYW5r cywNCj4gV2luY3kNCg0KDQpCZXN0IHJlZ2FyZHMsDQpZYW5nDQoNCg0K -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jan 28, 2015 at 7:52 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote: >>> >> >> If L0 wants to intercept a msr, we should set >> vmx_msr_bitmap_legacy(_x2apic) and vmx_msr_bitmap_longmode(_x2apic), >> and that bitmaps should only be loaded in non-nested entry. >> Currently we only clear the corresponding bits if L1 enables >> virtualize x2apic mode, all the other bits are all set. On nested >> entry, we load nested_msr_bitmap, on nested vmexit, we will restore L0's bitmap. >> In the nested entry, L0 only care about L2's msr access, not L1's. I >> think that in nested entry, nested_msr_bitmap is "vmcs01->msr_bitmap" >> as your mentioned above. > > Mmm... I think if the bit is setting in vmcs01->msr_bitmap means whenever the VCPU(no matter in nested guest mode or not) accesses this msr should cause vmexit, not only L1. That's why need to construct the vmcs02->msr_bitmap based on (vmcs01->msr_bitmap ORed vmcs12->msr_bitmap). > You are right, but this is not fit for all the cases, we should custom the nested_msr_bitmap. e.g. Currently L0 wants to intercept some of the x2apic msrs' reading: if (enable_apicv) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); /* According SDM, in x2apic mode, the whole id reg is used. * But in KVM, it only use the highest eight bits. Need to * intercept it */ vmx_enable_intercept_msr_read_x2apic(0x802); /* TMCCT */ vmx_enable_intercept_msr_read_x2apic(0x839); /* TPR */ vmx_disable_intercept_msr_write_x2apic(0x808); /* EOI */ vmx_disable_intercept_msr_write_x2apic(0x80b); /* SELF-IPI */ vmx_disable_intercept_msr_write_x2apic(0x83f); } But L1 may not want this. So I think we are better to deal with the msrs seperately, there is not a common way suit for all the cases. If other features want to intercept a msr in nested entry, they can put the custom code in nested_vmx_merge_msr_bitmap. Thanks, Wincy -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Wincy Van wrote on 2015-01-28: > On Wed, Jan 28, 2015 at 7:52 PM, Zhang, Yang Z <yang.z.zhang@intel.com> > wrote: >>>> >>> >>> If L0 wants to intercept a msr, we should set >>> vmx_msr_bitmap_legacy(_x2apic) and vmx_msr_bitmap_longmode(_x2apic), >>> and that bitmaps should only be loaded in non-nested entry. Currently >>> we only clear the corresponding bits if L1 enables virtualize x2apic >>> mode, all the other bits are all set. On nested entry, we load >>> nested_msr_bitmap, on nested vmexit, we will restore L0's bitmap. In >>> the nested entry, L0 only care about L2's msr access, not L1's. I >>> think that in nested entry, nested_msr_bitmap is "vmcs01->msr_bitmap" >>> as your mentioned above. >> >> Mmm... I think if the bit is setting in vmcs01->msr_bitmap means >> whenever > the VCPU(no matter in nested guest mode or not) accesses this msr > should cause vmexit, not only L1. That's why need to construct the > vmcs02->msr_bitmap based on (vmcs01->msr_bitmap ORed > vmcs12->msr_bitmap). >> > > You are right, but this is not fit for all the cases, we should custom > the nested_msr_bitmap. > e.g. Currently L0 wants to intercept some of the x2apic msrs' reading: > if (enable_apicv) { > for (msr = 0x800; msr <= 0x8ff; msr++) > vmx_disable_intercept_msr_read_x2apic(msr); > /* According SDM, in x2apic mode, the whole id reg is used. > * But in KVM, it only use the highest eight bits. Need to > * intercept it */ > vmx_enable_intercept_msr_read_x2apic(0x802); /* TMCCT */ > vmx_enable_intercept_msr_read_x2apic(0x839); /* TPR */ > vmx_disable_intercept_msr_write_x2apic(0x808); /* EOI */ > vmx_disable_intercept_msr_write_x2apic(0x80b); /* > SELF-IPI */ > vmx_disable_intercept_msr_write_x2apic(0x83f); > } > But L1 may not want this. So I think we are better to deal with the Actually, from L0's point, it is totally unaware of the L2. The only thing L0 aware is that the CPU should follow L0's configuration when VCPU is running. So if L0 wants to trap a msr, then the read operation to this msr should cause vmexit unconditionally no matter who is running(who means L1, L2, L3.....). > msrs seperately, there is not a common way suit for all the cases. If > other features want to intercept a msr in nested entry, they can put > the custom code in nested_vmx_merge_msr_bitmap. Yes, if other features want to do it in 'nested' entry, they can fill nested_vmx_merge_msr_bitmap. But if in non-nested case, it should be our responsibly to handle it correctly, how about add following check: if (type & MSR_TYPE_R && !test_bit(msr, vmcs01_msr_bitmap) && !test_bit(msr, msr_bitmap_l1 + 0x000 / f)) __clear_bit(msr, msr_bitmap_nested + 0x000 / f); > > > Thanks, > Wincy Best regards, Yang
Zhang, Yang Z wrote on 2015-01-28: > Wincy Van wrote on 2015-01-28: >> On Wed, Jan 28, 2015 at 7:52 PM, Zhang, Yang Z >> <yang.z.zhang@intel.com> >> wrote: >>>>> >>>> >>>> If L0 wants to intercept a msr, we should set >>>> vmx_msr_bitmap_legacy(_x2apic) and vmx_msr_bitmap_longmode(_x2apic), >>>> and that bitmaps should only be loaded in non-nested entry. Currently >>>> we only clear the corresponding bits if L1 enables virtualize x2apic >>>> mode, all the other bits are all set. On nested entry, we load >>>> nested_msr_bitmap, on nested vmexit, we will restore L0's bitmap. In >>>> the nested entry, L0 only care about L2's msr access, not L1's. I >>>> think that in nested entry, nested_msr_bitmap is "vmcs01->msr_bitmap" >>>> as your mentioned above. >>> >>> Mmm... I think if the bit is setting in vmcs01->msr_bitmap means >>> whenever >> the VCPU(no matter in nested guest mode or not) accesses this msr >> should cause vmexit, not only L1. That's why need to construct the >> vmcs02->msr_bitmap based on (vmcs01->msr_bitmap ORed >> vmcs12->msr_bitmap). >>> >> >> You are right, but this is not fit for all the cases, we should >> custom the nested_msr_bitmap. >> e.g. Currently L0 wants to intercept some of the x2apic msrs' reading: >> if (enable_apicv) { >> for (msr = 0x800; msr <= 0x8ff; msr++) > vmx_disable_intercept_msr_read_x2apic(msr); >> /* According SDM, in x2apic mode, the whole id reg >> is > used. >> * But in KVM, it only use the highest eight bits. Need to >> * intercept it */ >> vmx_enable_intercept_msr_read_x2apic(0x802); /* TMCCT >> */ vmx_enable_intercept_msr_read_x2apic(0x839); /* TPR >> */ vmx_disable_intercept_msr_write_x2apic(0x808); /* >> EOI > */ >> vmx_disable_intercept_msr_write_x2apic(0x80b); /* >> SELF-IPI */ >> vmx_disable_intercept_msr_write_x2apic(0x83f); >> } >> But L1 may not want this. So I think we are better to deal with the > > Actually, from L0's point, it is totally unaware of the L2. The only > thing L0 aware is that the CPU should follow L0's configuration when > VCPU is running. So if L0 wants to trap a msr, then the read operation > to this msr should cause vmexit unconditionally no matter who is running(who means L1, L2, L3.....). > >> msrs seperately, there is not a common way suit for all the cases. >> If other features want to intercept a msr in nested entry, they can >> put the custom code in nested_vmx_merge_msr_bitmap. > > Yes, if other features want to do it in 'nested' entry, they can fill > nested_vmx_merge_msr_bitmap. But if in non-nested case, it should be > our responsibly to handle it correctly, how about add following check: > > if (type & MSR_TYPE_R && !test_bit(msr, vmcs01_msr_bitmap) && > !test_bit(msr, msr_bitmap_l1 + 0x000 / f)) > __clear_bit(msr, msr_bitmap_nested + 0x000 / f); Anyway, this is not necessary for your current patch. We can consider it later if there really have other features will use it. > >> >> >> Thanks, >> Wincy > > > Best regards, > Yang > Best regards, Yang
On Wed, Jan 28, 2015 at 8:33 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote: >>> >>> You are right, but this is not fit for all the cases, we should >>> custom the nested_msr_bitmap. >>> e.g. Currently L0 wants to intercept some of the x2apic msrs' reading: >>> if (enable_apicv) { >>> for (msr = 0x800; msr <= 0x8ff; msr++) >> vmx_disable_intercept_msr_read_x2apic(msr); >>> /* According SDM, in x2apic mode, the whole id reg >>> is >> used. >>> * But in KVM, it only use the highest eight bits. Need to >>> * intercept it */ >>> vmx_enable_intercept_msr_read_x2apic(0x802); /* TMCCT >>> */ vmx_enable_intercept_msr_read_x2apic(0x839); /* TPR >>> */ vmx_disable_intercept_msr_write_x2apic(0x808); /* >>> EOI >> */ >>> vmx_disable_intercept_msr_write_x2apic(0x80b); /* >>> SELF-IPI */ >>> vmx_disable_intercept_msr_write_x2apic(0x83f); >>> } >>> But L1 may not want this. So I think we are better to deal with the >> >> Actually, from L0's point, it is totally unaware of the L2. The only >> thing L0 aware is that the CPU should follow L0's configuration when >> VCPU is running. So if L0 wants to trap a msr, then the read operation >> to this msr should cause vmexit unconditionally no matter who is running(who means L1, L2, L3.....). >> >>> msrs seperately, there is not a common way suit for all the cases. >>> If other features want to intercept a msr in nested entry, they can >>> put the custom code in nested_vmx_merge_msr_bitmap. >> >> Yes, if other features want to do it in 'nested' entry, they can fill >> nested_vmx_merge_msr_bitmap. But if in non-nested case, it should be >> our responsibly to handle it correctly, how about add following check: >> >> if (type & MSR_TYPE_R && !test_bit(msr, vmcs01_msr_bitmap) && >> !test_bit(msr, msr_bitmap_l1 + 0x000 / f)) >> __clear_bit(msr, msr_bitmap_nested + 0x000 / f); > > > Anyway, this is not necessary for your current patch. We can consider it later if there really have other features will use it. > Yep, I know what you mean now, for other msrs which are not forwarded access by a mechanism like virtual-apic page, we should intercept it unconditionally. I think we should ensure the msr can be allowed before call nested_vmx_disable_intercept_for_msr, if L0 want to intercept it, just do not call nested_vmx_disable_intercept_for_msr. !test_bit(msr, vmcs01_msr_bitmap) will introduce a problem that some of the msrs will be affcted by vmcs01_msr_bitmap, TMCCT and TPR, for example. Intercept reading for these msrs is okay, but it is not efficient. Thanks, Wincy -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Wincy Van wrote on 2015-01-28: > On Wed, Jan 28, 2015 at 8:33 PM, Zhang, Yang Z <yang.z.zhang@intel.com> > wrote: >>>> >>>> You are right, but this is not fit for all the cases, we should >>>> custom the nested_msr_bitmap. >>>> e.g. Currently L0 wants to intercept some of the x2apic msrs' reading: >>>> if (enable_apicv) { >>>> for (msr = 0x800; msr <= 0x8ff; msr++) >>>> vmx_disable_intercept_msr_read_x2apic(msr); /* >>>> According SDM, in x2apic mode, the whole id reg >>>> is >>> used. >>>> * But in KVM, it only use the highest eight bits. >>>> Need > to >>>> * intercept it */ >>>> vmx_enable_intercept_msr_read_x2apic(0x802); /* TMCCT >>>> */ vmx_enable_intercept_msr_read_x2apic(0x839); /* >>>> TPR */ vmx_disable_intercept_msr_write_x2apic(0x808); > /* >>>> EOI >>> */ >>>> vmx_disable_intercept_msr_write_x2apic(0x80b); /* >>>> SELF-IPI */ >>>> vmx_disable_intercept_msr_write_x2apic(0x83f); >>>> } >>>> But L1 may not want this. So I think we are better to deal with >>>> the >>> >>> Actually, from L0's point, it is totally unaware of the L2. The only >>> thing L0 aware is that the CPU should follow L0's configuration when >>> VCPU is running. So if L0 wants to trap a msr, then the read operation >>> to this msr should cause vmexit unconditionally no matter who is >>> running(who means L1, L2, L3.....). >>> >>>> msrs seperately, there is not a common way suit for all the cases. >>>> If other features want to intercept a msr in nested entry, they >>>> can put the custom code in nested_vmx_merge_msr_bitmap. >>> >>> Yes, if other features want to do it in 'nested' entry, they can >>> fill nested_vmx_merge_msr_bitmap. But if in non-nested case, it >>> should be our responsibly to handle it correctly, how about add following check: >>> >>> if (type & MSR_TYPE_R && !test_bit(msr, vmcs01_msr_bitmap) && >>> !test_bit(msr, msr_bitmap_l1 + 0x000 / f)) >>> __clear_bit(msr, msr_bitmap_nested + 0x000 / f); >> >> >> Anyway, this is not necessary for your current patch. We can consider >> it later if there really have other features will use it. >> > > Yep, I know what you mean now, for other msrs which are not forwarded > access by a mechanism like virtual-apic page, we should intercept it > unconditionally. I think we should ensure the msr can be allowed > before call nested_vmx_disable_intercept_for_msr, if L0 want to > intercept it, just do not call nested_vmx_disable_intercept_for_msr. Yes, this is a solution. But I prefer to handle it in nested code path since others may not familiar with nested and may block it by mistake. > > !test_bit(msr, vmcs01_msr_bitmap) will introduce a problem that some > of the msrs will be affcted by vmcs01_msr_bitmap, TMCCT and TPR, for example. > Intercept reading for these msrs is okay, but it is not efficient. TMCCT is always trapped by most VMM. I don't think TPR is trapped in KVM. > > Thanks, > Wincy Best regards, Yang
On Wed, Jan 28, 2015 at 9:06 PM, Zhang, Yang Z <yang.z.zhang@intel.com> wrote: >>>> __clear_bit(msr, msr_bitmap_nested + 0x000 / f); >>> >>> >>> Anyway, this is not necessary for your current patch. We can consider >>> it later if there really have other features will use it. >>> >> >> Yep, I know what you mean now, for other msrs which are not forwarded >> access by a mechanism like virtual-apic page, we should intercept it >> unconditionally. I think we should ensure the msr can be allowed >> before call nested_vmx_disable_intercept_for_msr, if L0 want to >> intercept it, just do not call nested_vmx_disable_intercept_for_msr. > > Yes, this is a solution. But I prefer to handle it in nested code path since others may not familiar with nested and may block it by mistake. > >> >> !test_bit(msr, vmcs01_msr_bitmap) will introduce a problem that some >> of the msrs will be affcted by vmcs01_msr_bitmap, TMCCT and TPR, for example. >> Intercept reading for these msrs is okay, but it is not efficient. > > TMCCT is always trapped by most VMM. I don't think TPR is trapped in KVM. > Oooops. This piece of code made me confused and I was not think about that words, just forget it. ( * ^ _ ^ * ) vmx_enable_intercept_msr_read_x2apic(0x802); /* TMCCT */ vmx_enable_intercept_msr_read_x2apic(0x839); /* TPR */ If there are any features use it in the future, let's consider about this, thank you for your patience. Thanks, Wincy -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c987374..36d0724 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -798,6 +798,7 @@ static unsigned long *vmx_msr_bitmap_legacy; static unsigned long *vmx_msr_bitmap_longmode; static unsigned long *vmx_msr_bitmap_legacy_x2apic; static unsigned long *vmx_msr_bitmap_longmode_x2apic; +static unsigned long *vmx_msr_bitmap_nested; static unsigned long *vmx_vmread_bitmap; static unsigned long *vmx_vmwrite_bitmap; @@ -5812,13 +5813,18 @@ static __init int hardware_setup(void) (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_msr_bitmap_longmode_x2apic) goto out4; + + vmx_msr_bitmap_nested = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_nested) + goto out5; + vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmread_bitmap) - goto out5; + goto out6; vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmwrite_bitmap) - goto out6; + goto out7; memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); @@ -5834,10 +5840,11 @@ static __init int hardware_setup(void) memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); + memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; - goto out7; + goto out8; } if (boot_cpu_has(X86_FEATURE_NX)) @@ -5944,10 +5951,12 @@ static __init int hardware_setup(void) return alloc_kvm_area(); -out7: +out8: free_page((unsigned long)vmx_vmwrite_bitmap); -out6: +out7: free_page((unsigned long)vmx_vmread_bitmap); +out6: + free_page((unsigned long)vmx_msr_bitmap_nested); out5: free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); out4: @@ -5970,6 +5979,7 @@ static __exit void hardware_unsetup(void) free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); free_page((unsigned long)vmx_msr_bitmap_legacy); free_page((unsigned long)vmx_msr_bitmap_longmode); + free_page((unsigned long)vmx_msr_bitmap_nested); free_page((unsigned long)vmx_io_bitmap_b); free_page((unsigned long)vmx_io_bitmap_a); free_page((unsigned long)vmx_vmwrite_bitmap); @@ -8305,6 +8315,38 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL); } +static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + int maxphyaddr; + u64 addr; + + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) + return 0; + + if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) { + WARN_ON(1); + return -EINVAL; + } + maxphyaddr = cpuid_maxphyaddr(vcpu); + + if (!PAGE_ALIGNED(vmcs12->msr_bitmap) || + ((addr + PAGE_SIZE) >> maxphyaddr)) + return -EINVAL; + + return 0; +} + +/* + * Merge L0's and L1's MSR bitmap, return false to indicate that + * we do not use the hardware. + */ +static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + return false; +} + static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, unsigned long count_field,
Currently, if L1 enables MSR_BITMAP, we will emulate this feature, all of L2's msr access is intercepted by L0. Since many features like virtualize x2apic mode has a complicated logic and it is difficult for us to emulate, we should use hardware and merge the bitmap. This patch introduces nested_vmx_merge_msr_bitmap for future use. Signed-off-by: Wincy Van <fanwenyi0529@gmail.com> --- arch/x86/kvm/vmx.c | 71 +++++++++++++++++++++++++++++++++++++++++++-------- 1 files changed, 60 insertions(+), 11 deletions(-) unsigned long addr_field, @@ -8637,11 +8679,17 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); } + if (cpu_has_vmx_msr_bitmap() && + exec_control & CPU_BASED_USE_MSR_BITMAPS && + nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) { + vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested)); + } else + exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; + /* - * Merging of IO and MSR bitmaps not currently supported. + * Merging of IO bitmap not currently supported. * Rather, exit every time. */ - exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; exec_control &= ~CPU_BASED_USE_IO_BITMAPS; exec_control |= CPU_BASED_UNCOND_IO_EXITING; @@ -8792,15 +8840,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return 1; } - if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && - !PAGE_ALIGNED(vmcs12->msr_bitmap)) { + if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { /*TODO: Also verify bits beyond physical address width are 0*/ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; } - if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { - /*TODO: Also verify bits beyond physical address width are 0*/ + if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) { nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; } @@ -9356,6 +9402,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, kvm_set_dr(vcpu, 7, 0x400); vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + if (cpu_has_vmx_msr_bitmap()) + vmx_set_msr_bitmap(vcpu); + if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html