diff mbox

[1/4] ARM: KVM: be more thorough when invalidating TLBs

Message ID 1367331446-28030-2-git-send-email-marc.zyngier@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Marc Zyngier April 30, 2013, 2:17 p.m. UTC
The KVM/ARM MMU code doesn't take care of invalidating TLBs before
freeing a {pte,pmd} table. This could cause problems if the page
is reallocated and then speculated into by another CPU.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/interrupts.S |  2 ++
 arch/arm/kvm/mmu.c        | 36 +++++++++++++++++++++---------------
 2 files changed, 23 insertions(+), 15 deletions(-)

Comments

Christoffer Dall April 30, 2013, 5:17 p.m. UTC | #1
On Tue, Apr 30, 2013 at 7:17 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> The KVM/ARM MMU code doesn't take care of invalidating TLBs before
> freeing a {pte,pmd} table. This could cause problems if the page
> is reallocated and then speculated into by another CPU.
>
> Reported-by: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm/kvm/interrupts.S |  2 ++
>  arch/arm/kvm/mmu.c        | 36 +++++++++++++++++++++---------------
>  2 files changed, 23 insertions(+), 15 deletions(-)
>
> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
> index f7793df..9e2d906c 100644
> --- a/arch/arm/kvm/interrupts.S
> +++ b/arch/arm/kvm/interrupts.S
> @@ -37,6 +37,8 @@ __kvm_hyp_code_start:
>   *
>   * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
>   *
> + * Invalidate TLB for the given IPA, or invalidate all if ipa is zero.
> + *
>   * We rely on the hardware to broadcast the TLB invalidation to all CPUs
>   * inside the inner-shareable domain (which is the case for all v7
>   * implementations).  If we come across a non-IS SMP implementation, we'll
> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
> index 9657065..71f2a57 100644
> --- a/arch/arm/kvm/mmu.c
> +++ b/arch/arm/kvm/mmu.c
> @@ -43,7 +43,8 @@ static phys_addr_t hyp_idmap_vector;
>
>  static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
>  {
> -       kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
> +       if (kvm)
> +               kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);

this feels slightly abusive? could we add a comment that hyp page
table freeing don't need tlb flushing from these functions and don't
have a struct kvm?

alternatively it may be more clear to have something like:

if (kvm) /* Check if Hyp or Stage-2 page table */
    kvm_tlb_flush_vmid_ipa(kvm, addr);

in the callers, but, eh, maybe I'm over thinking this.

>  }
>
>  static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
> @@ -78,18 +79,20 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
>         return p;
>  }
>
> -static void clear_pud_entry(pud_t *pud)
> +static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
>  {
>         pmd_t *pmd_table = pmd_offset(pud, 0);
>         pud_clear(pud);
> +       kvm_tlb_flush_vmid_ipa(kvm, addr);
>         pmd_free(NULL, pmd_table);
>         put_page(virt_to_page(pud));
>  }
>
> -static void clear_pmd_entry(pmd_t *pmd)
> +static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
>  {
>         pte_t *pte_table = pte_offset_kernel(pmd, 0);
>         pmd_clear(pmd);
> +       kvm_tlb_flush_vmid_ipa(kvm, addr);
>         pte_free_kernel(NULL, pte_table);
>         put_page(virt_to_page(pmd));
>  }
> @@ -100,11 +103,12 @@ static bool pmd_empty(pmd_t *pmd)
>         return page_count(pmd_page) == 1;
>  }
>
> -static void clear_pte_entry(pte_t *pte)
> +static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
>  {
>         if (pte_present(*pte)) {
>                 kvm_set_pte(pte, __pte(0));
>                 put_page(virt_to_page(pte));
> +               kvm_tlb_flush_vmid_ipa(kvm, addr);

I enjoyed the fact that it's perfectly correct to have this flush
after the put_page.

>         }
>  }
>
> @@ -114,7 +118,8 @@ static bool pte_empty(pte_t *pte)
>         return page_count(pte_page) == 1;
>  }
>
> -static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size)
> +static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
> +                       unsigned long long start, u64 size)
>  {
>         pgd_t *pgd;
>         pud_t *pud;
> @@ -138,15 +143,15 @@ static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size)
>                 }
>
>                 pte = pte_offset_kernel(pmd, addr);
> -               clear_pte_entry(pte);
> +               clear_pte_entry(kvm, pte, addr);
>                 range = PAGE_SIZE;
>
>                 /* If we emptied the pte, walk back up the ladder */
>                 if (pte_empty(pte)) {
> -                       clear_pmd_entry(pmd);
> +                       clear_pmd_entry(kvm, pmd, addr);
>                         range = PMD_SIZE;
>                         if (pmd_empty(pmd)) {
> -                               clear_pud_entry(pud);
> +                               clear_pud_entry(kvm, pud, addr);
>                                 range = PUD_SIZE;
>                         }
>                 }
> @@ -165,14 +170,14 @@ void free_boot_hyp_pgd(void)
>         mutex_lock(&kvm_hyp_pgd_mutex);
>
>         if (boot_hyp_pgd) {
> -               unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
> -               unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
> +               unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
> +               unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
>                 kfree(boot_hyp_pgd);
>                 boot_hyp_pgd = NULL;
>         }
>
>         if (hyp_pgd)
> -               unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
> +               unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
>
>         kfree(init_bounce_page);
>         init_bounce_page = NULL;
> @@ -200,9 +205,10 @@ void free_hyp_pgds(void)
>
>         if (hyp_pgd) {
>                 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
> -                       unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
> +                       unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
>                 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
> -                       unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
> +                       unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
> +
>                 kfree(hyp_pgd);
>                 hyp_pgd = NULL;
>         }
> @@ -393,7 +399,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
>   */
>  static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
>  {
> -       unmap_range(kvm->arch.pgd, start, size);
> +       unmap_range(kvm, kvm->arch.pgd, start, size);
>  }
>
>  /**
> @@ -413,6 +419,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
>                 return;
>
>         unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
> +       kvm_tlb_flush_vmid_ipa(kvm, 0); /* Invalidate TLB ALL */
>         free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
>         kvm->arch.pgd = NULL;
>  }
> @@ -675,7 +682,6 @@ static void handle_hva_to_gpa(struct kvm *kvm,
>  static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
>  {
>         unmap_stage2_range(kvm, gpa, PAGE_SIZE);
> -       kvm_tlb_flush_vmid_ipa(kvm, gpa);
>  }
>
>  int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
> --
> 1.8.2.1
>
>
looks good to me,

-Christoffer
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier April 30, 2013, 6:02 p.m. UTC | #2
On 30/04/13 18:17, Christoffer Dall wrote:
> On Tue, Apr 30, 2013 at 7:17 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
>> The KVM/ARM MMU code doesn't take care of invalidating TLBs before
>> freeing a {pte,pmd} table. This could cause problems if the page
>> is reallocated and then speculated into by another CPU.
>>
>> Reported-by: Catalin Marinas <catalin.marinas@arm.com>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>>  arch/arm/kvm/interrupts.S |  2 ++
>>  arch/arm/kvm/mmu.c        | 36 +++++++++++++++++++++---------------
>>  2 files changed, 23 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
>> index f7793df..9e2d906c 100644
>> --- a/arch/arm/kvm/interrupts.S
>> +++ b/arch/arm/kvm/interrupts.S
>> @@ -37,6 +37,8 @@ __kvm_hyp_code_start:
>>   *
>>   * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
>>   *
>> + * Invalidate TLB for the given IPA, or invalidate all if ipa is zero.
>> + *
>>   * We rely on the hardware to broadcast the TLB invalidation to all CPUs
>>   * inside the inner-shareable domain (which is the case for all v7
>>   * implementations).  If we come across a non-IS SMP implementation, we'll
>> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
>> index 9657065..71f2a57 100644
>> --- a/arch/arm/kvm/mmu.c
>> +++ b/arch/arm/kvm/mmu.c
>> @@ -43,7 +43,8 @@ static phys_addr_t hyp_idmap_vector;
>>
>>  static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
>>  {
>> -       kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
>> +       if (kvm)
>> +               kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
> 
> this feels slightly abusive? could we add a comment that hyp page
> table freeing don't need tlb flushing from these functions and don't
> have a struct kvm?
> 
> alternatively it may be more clear to have something like:
> 
> if (kvm) /* Check if Hyp or Stage-2 page table */
>     kvm_tlb_flush_vmid_ipa(kvm, addr);
> 
> in the callers, but, eh, maybe I'm over thinking this.

That code would be repeated three times, which feels a bit overkill.
I'll add a comment to the function so it is crystal clear.

Thanks,

	M.
diff mbox

Patch

diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index f7793df..9e2d906c 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -37,6 +37,8 @@  __kvm_hyp_code_start:
  *
  * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
  *
+ * Invalidate TLB for the given IPA, or invalidate all if ipa is zero.
+ *
  * We rely on the hardware to broadcast the TLB invalidation to all CPUs
  * inside the inner-shareable domain (which is the case for all v7
  * implementations).  If we come across a non-IS SMP implementation, we'll
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 9657065..71f2a57 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -43,7 +43,8 @@  static phys_addr_t hyp_idmap_vector;
 
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
-	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
+	if (kvm)
+		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
 }
 
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
@@ -78,18 +79,20 @@  static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 	return p;
 }
 
-static void clear_pud_entry(pud_t *pud)
+static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
 	pmd_t *pmd_table = pmd_offset(pud, 0);
 	pud_clear(pud);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
 	pmd_free(NULL, pmd_table);
 	put_page(virt_to_page(pud));
 }
 
-static void clear_pmd_entry(pmd_t *pmd)
+static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 {
 	pte_t *pte_table = pte_offset_kernel(pmd, 0);
 	pmd_clear(pmd);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
 	pte_free_kernel(NULL, pte_table);
 	put_page(virt_to_page(pmd));
 }
@@ -100,11 +103,12 @@  static bool pmd_empty(pmd_t *pmd)
 	return page_count(pmd_page) == 1;
 }
 
-static void clear_pte_entry(pte_t *pte)
+static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
 {
 	if (pte_present(*pte)) {
 		kvm_set_pte(pte, __pte(0));
 		put_page(virt_to_page(pte));
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
 	}
 }
 
@@ -114,7 +118,8 @@  static bool pte_empty(pte_t *pte)
 	return page_count(pte_page) == 1;
 }
 
-static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size)
+static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
+			unsigned long long start, u64 size)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -138,15 +143,15 @@  static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size)
 		}
 
 		pte = pte_offset_kernel(pmd, addr);
-		clear_pte_entry(pte);
+		clear_pte_entry(kvm, pte, addr);
 		range = PAGE_SIZE;
 
 		/* If we emptied the pte, walk back up the ladder */
 		if (pte_empty(pte)) {
-			clear_pmd_entry(pmd);
+			clear_pmd_entry(kvm, pmd, addr);
 			range = PMD_SIZE;
 			if (pmd_empty(pmd)) {
-				clear_pud_entry(pud);
+				clear_pud_entry(kvm, pud, addr);
 				range = PUD_SIZE;
 			}
 		}
@@ -165,14 +170,14 @@  void free_boot_hyp_pgd(void)
 	mutex_lock(&kvm_hyp_pgd_mutex);
 
 	if (boot_hyp_pgd) {
-		unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
-		unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
+		unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
+		unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 		kfree(boot_hyp_pgd);
 		boot_hyp_pgd = NULL;
 	}
 
 	if (hyp_pgd)
-		unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
+		unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 
 	kfree(init_bounce_page);
 	init_bounce_page = NULL;
@@ -200,9 +205,10 @@  void free_hyp_pgds(void)
 
 	if (hyp_pgd) {
 		for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
-			unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+			unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
 		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
-			unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+			unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+
 		kfree(hyp_pgd);
 		hyp_pgd = NULL;
 	}
@@ -393,7 +399,7 @@  int kvm_alloc_stage2_pgd(struct kvm *kvm)
  */
 static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 {
-	unmap_range(kvm->arch.pgd, start, size);
+	unmap_range(kvm, kvm->arch.pgd, start, size);
 }
 
 /**
@@ -413,6 +419,7 @@  void kvm_free_stage2_pgd(struct kvm *kvm)
 		return;
 
 	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+	kvm_tlb_flush_vmid_ipa(kvm, 0);	/* Invalidate TLB ALL */
 	free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
 	kvm->arch.pgd = NULL;
 }
@@ -675,7 +682,6 @@  static void handle_hva_to_gpa(struct kvm *kvm,
 static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
 {
 	unmap_stage2_range(kvm, gpa, PAGE_SIZE);
-	kvm_tlb_flush_vmid_ipa(kvm, gpa);
 }
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)