Message ID | 1548966284-28642-9-git-send-email-karahmed@amazon.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM/X86: Introduce a new guest mapping interface | expand |
On Thu, Jan 31, 2019 at 12:28 PM KarimAllah Ahmed <karahmed@amazon.de> wrote: > > Use kvm_vcpu_map when mapping the posted interrupt descriptor table since > using kvm_vcpu_gpa_to_page() and kmap() will only work for guest memory > that has a "struct page". > > One additional semantic change is that the virtual host mapping lifecycle > has changed a bit. It now has the same lifetime of the pinning of the > interrupt descriptor table page on the host side. > > Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> > Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > --- > v4 -> v5: > - unmap with dirty flag > > v1 -> v2: > - Do not change the lifecycle of the mapping (pbonzini) > --- > arch/x86/kvm/vmx/nested.c | 43 ++++++++++++------------------------------- > arch/x86/kvm/vmx/vmx.h | 2 +- > 2 files changed, 13 insertions(+), 32 deletions(-) > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > index 31b352c..53b1063 100644 > --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -230,12 +230,8 @@ static void free_nested(struct kvm_vcpu *vcpu) > vmx->nested.apic_access_page = NULL; > } > kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); > - if (vmx->nested.pi_desc_page) { > - kunmap(vmx->nested.pi_desc_page); > - kvm_release_page_dirty(vmx->nested.pi_desc_page); > - vmx->nested.pi_desc_page = NULL; > - vmx->nested.pi_desc = NULL; > - } > + kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); > + vmx->nested.pi_desc = NULL; > > kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); > > @@ -2868,26 +2864,15 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) > } > > if (nested_cpu_has_posted_intr(vmcs12)) { > - if (vmx->nested.pi_desc_page) { /* shouldn't happen */ > - kunmap(vmx->nested.pi_desc_page); > - kvm_release_page_dirty(vmx->nested.pi_desc_page); > - vmx->nested.pi_desc_page = NULL; > - vmx->nested.pi_desc = NULL; > - vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull); > + map = &vmx->nested.pi_desc_map; > + > + if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) { > + vmx->nested.pi_desc = > + (struct pi_desc *)(((void *)map->hva) + > + offset_in_page(vmcs12->posted_intr_desc_addr)); > + vmcs_write64(POSTED_INTR_DESC_ADDR, > + pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr)); > } Previously, if there was no backing page for the vmcs12->posted_intr_desc_addr, we wrote an illegal value (-1ull) into the vmcs02 POSTED_INTR_DESC_ADDR field to force VM-entry failure. Now, AFAICT, we leave that field unmodified. For a newly constructed vmcs02, doesn't that mean we're going to treat physical address 0 as the address of the vmcs02 posted interrupt descriptor? > - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr); > - if (is_error_page(page)) > - return; > - vmx->nested.pi_desc_page = page; > - vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page); > - vmx->nested.pi_desc = > - (struct pi_desc *)((void *)vmx->nested.pi_desc + > - (unsigned long)(vmcs12->posted_intr_desc_addr & > - (PAGE_SIZE - 1))); > - vmcs_write64(POSTED_INTR_DESC_ADDR, > - page_to_phys(vmx->nested.pi_desc_page) + > - (unsigned long)(vmcs12->posted_intr_desc_addr & > - (PAGE_SIZE - 1))); > } > if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) > vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, > @@ -3911,12 +3896,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, > vmx->nested.apic_access_page = NULL; > } > kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); > - if (vmx->nested.pi_desc_page) { > - kunmap(vmx->nested.pi_desc_page); > - kvm_release_page_dirty(vmx->nested.pi_desc_page); > - vmx->nested.pi_desc_page = NULL; > - vmx->nested.pi_desc = NULL; > - } > + kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); > + vmx->nested.pi_desc = NULL; > > /* > * We are now running in L2, mmu_notifier will force to reload the > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h > index f618f52..bd04725 100644 > --- a/arch/x86/kvm/vmx/vmx.h > +++ b/arch/x86/kvm/vmx/vmx.h > @@ -143,7 +143,7 @@ struct nested_vmx { > */ > struct page *apic_access_page; > struct kvm_host_map virtual_apic_map; > - struct page *pi_desc_page; > + struct kvm_host_map pi_desc_map; > > struct kvm_host_map msr_bitmap_map; > > -- > 2.7.4 >
On Thu, May 06, 2021, Jim Mattson wrote: > On Thu, Jan 31, 2019 at 12:28 PM KarimAllah Ahmed <karahmed@amazon.de> wrote: > > > > Use kvm_vcpu_map when mapping the posted interrupt descriptor table since > > using kvm_vcpu_gpa_to_page() and kmap() will only work for guest memory > > that has a "struct page". > > > > One additional semantic change is that the virtual host mapping lifecycle > > has changed a bit. It now has the same lifetime of the pinning of the > > interrupt descriptor table page on the host side. > > > > Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> > > Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > > --- > > v4 -> v5: > > - unmap with dirty flag > > > > v1 -> v2: > > - Do not change the lifecycle of the mapping (pbonzini) > > --- > > arch/x86/kvm/vmx/nested.c | 43 ++++++++++++------------------------------- > > arch/x86/kvm/vmx/vmx.h | 2 +- > > 2 files changed, 13 insertions(+), 32 deletions(-) > > > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > > index 31b352c..53b1063 100644 > > --- a/arch/x86/kvm/vmx/nested.c > > +++ b/arch/x86/kvm/vmx/nested.c > > @@ -230,12 +230,8 @@ static void free_nested(struct kvm_vcpu *vcpu) > > vmx->nested.apic_access_page = NULL; > > } > > kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); > > - if (vmx->nested.pi_desc_page) { > > - kunmap(vmx->nested.pi_desc_page); > > - kvm_release_page_dirty(vmx->nested.pi_desc_page); > > - vmx->nested.pi_desc_page = NULL; > > - vmx->nested.pi_desc = NULL; > > - } > > + kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); > > + vmx->nested.pi_desc = NULL; > > > > kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); > > > > @@ -2868,26 +2864,15 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) > > } > > > > if (nested_cpu_has_posted_intr(vmcs12)) { > > - if (vmx->nested.pi_desc_page) { /* shouldn't happen */ > > - kunmap(vmx->nested.pi_desc_page); > > - kvm_release_page_dirty(vmx->nested.pi_desc_page); > > - vmx->nested.pi_desc_page = NULL; > > - vmx->nested.pi_desc = NULL; > > - vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull); > > + map = &vmx->nested.pi_desc_map; > > + > > + if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) { > > + vmx->nested.pi_desc = > > + (struct pi_desc *)(((void *)map->hva) + > > + offset_in_page(vmcs12->posted_intr_desc_addr)); > > + vmcs_write64(POSTED_INTR_DESC_ADDR, > > + pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr)); > > } > > Previously, if there was no backing page for the > vmcs12->posted_intr_desc_addr, we wrote an illegal value (-1ull) into > the vmcs02 POSTED_INTR_DESC_ADDR field to force VM-entry failure. The "vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull)" above is for the "impossible" case where the PI descriptor was already mapped. The error handling for failure to map is below. The (forced) VM-Exit unmap paths don't stuff vmcs02 either. In other words, I think the bug was pre-existing. > Now, AFAICT, we leave that field unmodified. For a newly constructed vmcs02, > doesn't that mean we're going to treat physical address 0 as the address of > the vmcs02 posted interrupt descriptor? PA=0 is the happy path. Thanks to L1TF, that memory is always unused. If mapping for a previous VM-Enter succeeded, vmcs02.POSTED_INTR_DESC_ADDR will hold whatever PA was used for the last VM-Enter. > > - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr); > > - if (is_error_page(page)) > > - return; Error path for failure to map. > > - vmx->nested.pi_desc_page = page; > > - vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page); > > - vmx->nested.pi_desc = > > - (struct pi_desc *)((void *)vmx->nested.pi_desc + > > - (unsigned long)(vmcs12->posted_intr_desc_addr & > > - (PAGE_SIZE - 1))); > > - vmcs_write64(POSTED_INTR_DESC_ADDR, > > - page_to_phys(vmx->nested.pi_desc_page) + > > - (unsigned long)(vmcs12->posted_intr_desc_addr & > > - (PAGE_SIZE - 1))); > > } > > if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) > > vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, > > @@ -3911,12 +3896,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, > > vmx->nested.apic_access_page = NULL; > > } > > kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); > > - if (vmx->nested.pi_desc_page) { > > - kunmap(vmx->nested.pi_desc_page); > > - kvm_release_page_dirty(vmx->nested.pi_desc_page); > > - vmx->nested.pi_desc_page = NULL; > > - vmx->nested.pi_desc = NULL; > > - } > > + kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); > > + vmx->nested.pi_desc = NULL; > > > > /* > > * We are now running in L2, mmu_notifier will force to reload the > > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h > > index f618f52..bd04725 100644 > > --- a/arch/x86/kvm/vmx/vmx.h > > +++ b/arch/x86/kvm/vmx/vmx.h > > @@ -143,7 +143,7 @@ struct nested_vmx { > > */ > > struct page *apic_access_page; > > struct kvm_host_map virtual_apic_map; > > - struct page *pi_desc_page; > > + struct kvm_host_map pi_desc_map; > > > > struct kvm_host_map msr_bitmap_map; > > > > -- > > 2.7.4 > >
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 31b352c..53b1063 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -230,12 +230,8 @@ static void free_nested(struct kvm_vcpu *vcpu) vmx->nested.apic_access_page = NULL; } kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); - if (vmx->nested.pi_desc_page) { - kunmap(vmx->nested.pi_desc_page); - kvm_release_page_dirty(vmx->nested.pi_desc_page); - vmx->nested.pi_desc_page = NULL; - vmx->nested.pi_desc = NULL; - } + kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); + vmx->nested.pi_desc = NULL; kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); @@ -2868,26 +2864,15 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) } if (nested_cpu_has_posted_intr(vmcs12)) { - if (vmx->nested.pi_desc_page) { /* shouldn't happen */ - kunmap(vmx->nested.pi_desc_page); - kvm_release_page_dirty(vmx->nested.pi_desc_page); - vmx->nested.pi_desc_page = NULL; - vmx->nested.pi_desc = NULL; - vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull); + map = &vmx->nested.pi_desc_map; + + if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) { + vmx->nested.pi_desc = + (struct pi_desc *)(((void *)map->hva) + + offset_in_page(vmcs12->posted_intr_desc_addr)); + vmcs_write64(POSTED_INTR_DESC_ADDR, + pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr)); } - page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr); - if (is_error_page(page)) - return; - vmx->nested.pi_desc_page = page; - vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page); - vmx->nested.pi_desc = - (struct pi_desc *)((void *)vmx->nested.pi_desc + - (unsigned long)(vmcs12->posted_intr_desc_addr & - (PAGE_SIZE - 1))); - vmcs_write64(POSTED_INTR_DESC_ADDR, - page_to_phys(vmx->nested.pi_desc_page) + - (unsigned long)(vmcs12->posted_intr_desc_addr & - (PAGE_SIZE - 1))); } if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, @@ -3911,12 +3896,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmx->nested.apic_access_page = NULL; } kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); - if (vmx->nested.pi_desc_page) { - kunmap(vmx->nested.pi_desc_page); - kvm_release_page_dirty(vmx->nested.pi_desc_page); - vmx->nested.pi_desc_page = NULL; - vmx->nested.pi_desc = NULL; - } + kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); + vmx->nested.pi_desc = NULL; /* * We are now running in L2, mmu_notifier will force to reload the diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index f618f52..bd04725 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -143,7 +143,7 @@ struct nested_vmx { */ struct page *apic_access_page; struct kvm_host_map virtual_apic_map; - struct page *pi_desc_page; + struct kvm_host_map pi_desc_map; struct kvm_host_map msr_bitmap_map;