diff mbox series

[v6,08/14] KVM/nVMX: Use kvm_vcpu_map when mapping the posted interrupt descriptor table

Message ID 1548966284-28642-9-git-send-email-karahmed@amazon.de (mailing list archive)
State New, archived
Headers show
Series KVM/X86: Introduce a new guest mapping interface | expand

Commit Message

KarimAllah Ahmed Jan. 31, 2019, 8:24 p.m. UTC
Use kvm_vcpu_map when mapping the posted interrupt descriptor table since
using kvm_vcpu_gpa_to_page() and kmap() will only work for guest memory
that has a "struct page".

One additional semantic change is that the virtual host mapping lifecycle
has changed a bit. It now has the same lifetime of the pinning of the
interrupt descriptor table page on the host side.

Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
v4 -> v5:
- unmap with dirty flag

v1 -> v2:
- Do not change the lifecycle of the mapping (pbonzini)
---
 arch/x86/kvm/vmx/nested.c | 43 ++++++++++++-------------------------------
 arch/x86/kvm/vmx/vmx.h    |  2 +-
 2 files changed, 13 insertions(+), 32 deletions(-)

Comments

Jim Mattson May 6, 2021, 11:06 p.m. UTC | #1
On Thu, Jan 31, 2019 at 12:28 PM KarimAllah Ahmed <karahmed@amazon.de> wrote:
>
> Use kvm_vcpu_map when mapping the posted interrupt descriptor table since
> using kvm_vcpu_gpa_to_page() and kmap() will only work for guest memory
> that has a "struct page".
>
> One additional semantic change is that the virtual host mapping lifecycle
> has changed a bit. It now has the same lifetime of the pinning of the
> interrupt descriptor table page on the host side.
>
> Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> ---
> v4 -> v5:
> - unmap with dirty flag
>
> v1 -> v2:
> - Do not change the lifecycle of the mapping (pbonzini)
> ---
>  arch/x86/kvm/vmx/nested.c | 43 ++++++++++++-------------------------------
>  arch/x86/kvm/vmx/vmx.h    |  2 +-
>  2 files changed, 13 insertions(+), 32 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 31b352c..53b1063 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -230,12 +230,8 @@ static void free_nested(struct kvm_vcpu *vcpu)
>                 vmx->nested.apic_access_page = NULL;
>         }
>         kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
> -       if (vmx->nested.pi_desc_page) {
> -               kunmap(vmx->nested.pi_desc_page);
> -               kvm_release_page_dirty(vmx->nested.pi_desc_page);
> -               vmx->nested.pi_desc_page = NULL;
> -               vmx->nested.pi_desc = NULL;
> -       }
> +       kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
> +       vmx->nested.pi_desc = NULL;
>
>         kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
>
> @@ -2868,26 +2864,15 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
>         }
>
>         if (nested_cpu_has_posted_intr(vmcs12)) {
> -               if (vmx->nested.pi_desc_page) { /* shouldn't happen */
> -                       kunmap(vmx->nested.pi_desc_page);
> -                       kvm_release_page_dirty(vmx->nested.pi_desc_page);
> -                       vmx->nested.pi_desc_page = NULL;
> -                       vmx->nested.pi_desc = NULL;
> -                       vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull);
> +               map = &vmx->nested.pi_desc_map;
> +
> +               if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
> +                       vmx->nested.pi_desc =
> +                               (struct pi_desc *)(((void *)map->hva) +
> +                               offset_in_page(vmcs12->posted_intr_desc_addr));
> +                       vmcs_write64(POSTED_INTR_DESC_ADDR,
> +                                    pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
>                 }

Previously, if there was no backing page for the
vmcs12->posted_intr_desc_addr, we wrote an illegal value (-1ull) into
the vmcs02 POSTED_INTR_DESC_ADDR field to force VM-entry failure. Now,
AFAICT, we leave that field unmodified. For a newly constructed
vmcs02, doesn't that mean we're going to treat physical address 0 as
the address of the vmcs02 posted interrupt descriptor?

> -               page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr);
> -               if (is_error_page(page))
> -                       return;
> -               vmx->nested.pi_desc_page = page;
> -               vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
> -               vmx->nested.pi_desc =
> -                       (struct pi_desc *)((void *)vmx->nested.pi_desc +
> -                       (unsigned long)(vmcs12->posted_intr_desc_addr &
> -                       (PAGE_SIZE - 1)));
> -               vmcs_write64(POSTED_INTR_DESC_ADDR,
> -                       page_to_phys(vmx->nested.pi_desc_page) +
> -                       (unsigned long)(vmcs12->posted_intr_desc_addr &
> -                       (PAGE_SIZE - 1)));
>         }
>         if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
>                 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
> @@ -3911,12 +3896,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
>                 vmx->nested.apic_access_page = NULL;
>         }
>         kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
> -       if (vmx->nested.pi_desc_page) {
> -               kunmap(vmx->nested.pi_desc_page);
> -               kvm_release_page_dirty(vmx->nested.pi_desc_page);
> -               vmx->nested.pi_desc_page = NULL;
> -               vmx->nested.pi_desc = NULL;
> -       }
> +       kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
> +       vmx->nested.pi_desc = NULL;
>
>         /*
>          * We are now running in L2, mmu_notifier will force to reload the
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index f618f52..bd04725 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -143,7 +143,7 @@ struct nested_vmx {
>          */
>         struct page *apic_access_page;
>         struct kvm_host_map virtual_apic_map;
> -       struct page *pi_desc_page;
> +       struct kvm_host_map pi_desc_map;
>
>         struct kvm_host_map msr_bitmap_map;
>
> --
> 2.7.4
>
Sean Christopherson May 6, 2021, 11:27 p.m. UTC | #2
On Thu, May 06, 2021, Jim Mattson wrote:
> On Thu, Jan 31, 2019 at 12:28 PM KarimAllah Ahmed <karahmed@amazon.de> wrote:
> >
> > Use kvm_vcpu_map when mapping the posted interrupt descriptor table since
> > using kvm_vcpu_gpa_to_page() and kmap() will only work for guest memory
> > that has a "struct page".
> >
> > One additional semantic change is that the virtual host mapping lifecycle
> > has changed a bit. It now has the same lifetime of the pinning of the
> > interrupt descriptor table page on the host side.
> >
> > Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
> > Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> > ---
> > v4 -> v5:
> > - unmap with dirty flag
> >
> > v1 -> v2:
> > - Do not change the lifecycle of the mapping (pbonzini)
> > ---
> >  arch/x86/kvm/vmx/nested.c | 43 ++++++++++++-------------------------------
> >  arch/x86/kvm/vmx/vmx.h    |  2 +-
> >  2 files changed, 13 insertions(+), 32 deletions(-)
> >
> > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> > index 31b352c..53b1063 100644
> > --- a/arch/x86/kvm/vmx/nested.c
> > +++ b/arch/x86/kvm/vmx/nested.c
> > @@ -230,12 +230,8 @@ static void free_nested(struct kvm_vcpu *vcpu)
> >                 vmx->nested.apic_access_page = NULL;
> >         }
> >         kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
> > -       if (vmx->nested.pi_desc_page) {
> > -               kunmap(vmx->nested.pi_desc_page);
> > -               kvm_release_page_dirty(vmx->nested.pi_desc_page);
> > -               vmx->nested.pi_desc_page = NULL;
> > -               vmx->nested.pi_desc = NULL;
> > -       }
> > +       kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
> > +       vmx->nested.pi_desc = NULL;
> >
> >         kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
> >
> > @@ -2868,26 +2864,15 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
> >         }
> >
> >         if (nested_cpu_has_posted_intr(vmcs12)) {
> > -               if (vmx->nested.pi_desc_page) { /* shouldn't happen */
> > -                       kunmap(vmx->nested.pi_desc_page);
> > -                       kvm_release_page_dirty(vmx->nested.pi_desc_page);
> > -                       vmx->nested.pi_desc_page = NULL;
> > -                       vmx->nested.pi_desc = NULL;
> > -                       vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull);
> > +               map = &vmx->nested.pi_desc_map;
> > +
> > +               if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
> > +                       vmx->nested.pi_desc =
> > +                               (struct pi_desc *)(((void *)map->hva) +
> > +                               offset_in_page(vmcs12->posted_intr_desc_addr));
> > +                       vmcs_write64(POSTED_INTR_DESC_ADDR,
> > +                                    pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
> >                 }
> 
> Previously, if there was no backing page for the
> vmcs12->posted_intr_desc_addr, we wrote an illegal value (-1ull) into
> the vmcs02 POSTED_INTR_DESC_ADDR field to force VM-entry failure.

The "vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull)" above is for the "impossible"
case where the PI descriptor was already mapped.  The error handling for failure
to map is below.  The (forced) VM-Exit unmap paths don't stuff vmcs02 either.
In other words, I think the bug was pre-existing.

> Now, AFAICT, we leave that field unmodified. For a newly constructed vmcs02,
> doesn't that mean we're going to treat physical address 0 as the address of
> the vmcs02 posted interrupt descriptor?

PA=0 is the happy path.  Thanks to L1TF, that memory is always unused.  If
mapping for a previous VM-Enter succeeded, vmcs02.POSTED_INTR_DESC_ADDR will
hold whatever PA was used for the last VM-Enter.
 
> > -               page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr);
> > -               if (is_error_page(page))
> > -                       return;

Error path for failure to map.

> > -               vmx->nested.pi_desc_page = page;
> > -               vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
> > -               vmx->nested.pi_desc =
> > -                       (struct pi_desc *)((void *)vmx->nested.pi_desc +
> > -                       (unsigned long)(vmcs12->posted_intr_desc_addr &
> > -                       (PAGE_SIZE - 1)));
> > -               vmcs_write64(POSTED_INTR_DESC_ADDR,
> > -                       page_to_phys(vmx->nested.pi_desc_page) +
> > -                       (unsigned long)(vmcs12->posted_intr_desc_addr &
> > -                       (PAGE_SIZE - 1)));
> >         }
> >         if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
> >                 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
> > @@ -3911,12 +3896,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
> >                 vmx->nested.apic_access_page = NULL;
> >         }
> >         kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
> > -       if (vmx->nested.pi_desc_page) {
> > -               kunmap(vmx->nested.pi_desc_page);
> > -               kvm_release_page_dirty(vmx->nested.pi_desc_page);
> > -               vmx->nested.pi_desc_page = NULL;
> > -               vmx->nested.pi_desc = NULL;
> > -       }
> > +       kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
> > +       vmx->nested.pi_desc = NULL;
> >
> >         /*
> >          * We are now running in L2, mmu_notifier will force to reload the
> > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> > index f618f52..bd04725 100644
> > --- a/arch/x86/kvm/vmx/vmx.h
> > +++ b/arch/x86/kvm/vmx/vmx.h
> > @@ -143,7 +143,7 @@ struct nested_vmx {
> >          */
> >         struct page *apic_access_page;
> >         struct kvm_host_map virtual_apic_map;
> > -       struct page *pi_desc_page;
> > +       struct kvm_host_map pi_desc_map;
> >
> >         struct kvm_host_map msr_bitmap_map;
> >
> > --
> > 2.7.4
> >
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 31b352c..53b1063 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -230,12 +230,8 @@  static void free_nested(struct kvm_vcpu *vcpu)
 		vmx->nested.apic_access_page = NULL;
 	}
 	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
-	if (vmx->nested.pi_desc_page) {
-		kunmap(vmx->nested.pi_desc_page);
-		kvm_release_page_dirty(vmx->nested.pi_desc_page);
-		vmx->nested.pi_desc_page = NULL;
-		vmx->nested.pi_desc = NULL;
-	}
+	kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
+	vmx->nested.pi_desc = NULL;
 
 	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
 
@@ -2868,26 +2864,15 @@  static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 	}
 
 	if (nested_cpu_has_posted_intr(vmcs12)) {
-		if (vmx->nested.pi_desc_page) { /* shouldn't happen */
-			kunmap(vmx->nested.pi_desc_page);
-			kvm_release_page_dirty(vmx->nested.pi_desc_page);
-			vmx->nested.pi_desc_page = NULL;
-			vmx->nested.pi_desc = NULL;
-			vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull);
+		map = &vmx->nested.pi_desc_map;
+
+		if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
+			vmx->nested.pi_desc =
+				(struct pi_desc *)(((void *)map->hva) +
+				offset_in_page(vmcs12->posted_intr_desc_addr));
+			vmcs_write64(POSTED_INTR_DESC_ADDR,
+				     pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
 		}
-		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr);
-		if (is_error_page(page))
-			return;
-		vmx->nested.pi_desc_page = page;
-		vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
-		vmx->nested.pi_desc =
-			(struct pi_desc *)((void *)vmx->nested.pi_desc +
-			(unsigned long)(vmcs12->posted_intr_desc_addr &
-			(PAGE_SIZE - 1)));
-		vmcs_write64(POSTED_INTR_DESC_ADDR,
-			page_to_phys(vmx->nested.pi_desc_page) +
-			(unsigned long)(vmcs12->posted_intr_desc_addr &
-			(PAGE_SIZE - 1)));
 	}
 	if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
 		vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
@@ -3911,12 +3896,8 @@  void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 		vmx->nested.apic_access_page = NULL;
 	}
 	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
-	if (vmx->nested.pi_desc_page) {
-		kunmap(vmx->nested.pi_desc_page);
-		kvm_release_page_dirty(vmx->nested.pi_desc_page);
-		vmx->nested.pi_desc_page = NULL;
-		vmx->nested.pi_desc = NULL;
-	}
+	kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
+	vmx->nested.pi_desc = NULL;
 
 	/*
 	 * We are now running in L2, mmu_notifier will force to reload the
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index f618f52..bd04725 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -143,7 +143,7 @@  struct nested_vmx {
 	 */
 	struct page *apic_access_page;
 	struct kvm_host_map virtual_apic_map;
-	struct page *pi_desc_page;
+	struct kvm_host_map pi_desc_map;
 
 	struct kvm_host_map msr_bitmap_map;