diff mbox series

[08/26] KVM: arm64: Use TTL hint in when invalidating stage-2 translations

Message ID 20200422120050.3693593-9-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Preliminary NV patches | expand

Commit Message

Marc Zyngier April 22, 2020, noon UTC
Since we always have a precide idea of the level we're dealing with
when invalidating TLBs, we can provide it to as a hint to our
invalidation helper.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/stage2_pgtable.h |  9 +++++++++
 virt/kvm/arm/mmu.c                      | 27 +++++++++++++------------
 2 files changed, 23 insertions(+), 13 deletions(-)

Comments

Andrew Scull May 7, 2020, 3:13 p.m. UTC | #1
> @@ -176,7 +177,7 @@ static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr
>  	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0);
>  	VM_BUG_ON(stage2_pud_huge(kvm, *pud));
>  	stage2_pud_clear(kvm, pud);
> -	kvm_tlb_flush_vmid_ipa(mmu, addr);
> +	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
>  	stage2_pmd_free(kvm, pmd_table);
>  	put_page(virt_to_page(pud));
>  }
> @@ -186,7 +187,7 @@ static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr
>  	pte_t *pte_table = pte_offset_kernel(pmd, 0);
>  	VM_BUG_ON(pmd_thp_or_huge(*pmd));
>  	pmd_clear(pmd);
> -	kvm_tlb_flush_vmid_ipa(mmu, addr);
> +	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
>  	free_page((unsigned long)pte_table);
>  	put_page(virt_to_page(pmd));
>  }

Going by the names, is it possible to give a better level hint for these
cases?
James Morse May 12, 2020, 12:04 p.m. UTC | #2
Hi Andrew,

On 07/05/2020 16:13, Andrew Scull wrote:
>> @@ -176,7 +177,7 @@ static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr
>>  	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0);
>>  	VM_BUG_ON(stage2_pud_huge(kvm, *pud));
>>  	stage2_pud_clear(kvm, pud);
>> -	kvm_tlb_flush_vmid_ipa(mmu, addr);
>> +	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
>>  	stage2_pmd_free(kvm, pmd_table);
>>  	put_page(virt_to_page(pud));
>>  }
>> @@ -186,7 +187,7 @@ static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr
>>  	pte_t *pte_table = pte_offset_kernel(pmd, 0);
>>  	VM_BUG_ON(pmd_thp_or_huge(*pmd));
>>  	pmd_clear(pmd);
>> -	kvm_tlb_flush_vmid_ipa(mmu, addr);
>> +	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
>>  	free_page((unsigned long)pte_table);
>>  	put_page(virt_to_page(pmd));
>>  }
> 
> Going by the names, is it possible to give a better level hint for these
> cases?

There is no leaf entry being invalidated here. After clearing the range, we found we'd
emptied (and invalidated) a whole page of mappings:
|	if (stage2_pmd_table_empty(kvm, start_pmd))
|		clear_stage2_pud_entry(mmu, pud, start_addr);

Now we want to remove the link to the empty page so we can free it. We are changing the
structure of the tables, not what gets mapped.

I think this is why we need the un-hinted behaviour, to invalidate "any level of the
translation table walk required to translate the specified IPA". Otherwise the hardware
can look for a leaf at the indicated level, find none, and do nothing.


This is sufficiently horrible, its possible I've got it completely wrong! (does it make
sense?)


Thanks,

James
James Morse May 12, 2020, 5:26 p.m. UTC | #3
Hi Marc,

On 22/04/2020 13:00, Marc Zyngier wrote:
> Since we always have a precide idea of the level we're dealing with

(precise)

> when invalidating TLBs, we can provide it to as a hint to our
> invalidation helper.

> diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
> index 326aac658b9da..7ed5c1a769a9b 100644
> --- a/arch/arm64/include/asm/stage2_pgtable.h
> +++ b/arch/arm64/include/asm/stage2_pgtable.h
> @@ -230,4 +230,13 @@ stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
>  	return (boundary - 1 < end - 1) ? boundary : end;
>  }
>  
> +/*
> + * Level values for the ARMv8.4-TTL extension, mapping PUD/PMD/PTE and
> + * the architectural page-table level.
> + */
> +#define S2_NO_LEVEL_HINT	0
> +#define S2_PUD_LEVEL		1
> +#define S2_PMD_LEVEL		2
> +#define S2_PTE_LEVEL		3

Are these really just for stage2, would the stage1 definition be the same?

~

Digging into the VTCR_EL2.SL0 trickery, it does everything at pgd where there are no block
mappings, and no hints, so it looks fine.

Reviewed-by: James Morse <james.morse@arm.com>


Thanks,

James
Andrew Scull May 13, 2020, 9:06 a.m. UTC | #4
On Tue, May 12, 2020 at 01:04:31PM +0100, James Morse wrote:
> Hi Andrew,
> 
> On 07/05/2020 16:13, Andrew Scull wrote:
> >> @@ -176,7 +177,7 @@ static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr
> >>  	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0);
> >>  	VM_BUG_ON(stage2_pud_huge(kvm, *pud));
> >>  	stage2_pud_clear(kvm, pud);
> >> -	kvm_tlb_flush_vmid_ipa(mmu, addr);
> >> +	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
> >>  	stage2_pmd_free(kvm, pmd_table);
> >>  	put_page(virt_to_page(pud));
> >>  }
> >> @@ -186,7 +187,7 @@ static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr
> >>  	pte_t *pte_table = pte_offset_kernel(pmd, 0);
> >>  	VM_BUG_ON(pmd_thp_or_huge(*pmd));
> >>  	pmd_clear(pmd);
> >> -	kvm_tlb_flush_vmid_ipa(mmu, addr);
> >> +	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
> >>  	free_page((unsigned long)pte_table);
> >>  	put_page(virt_to_page(pmd));
> >>  }
> > 
> > Going by the names, is it possible to give a better level hint for these
> > cases?
> 
> There is no leaf entry being invalidated here. After clearing the range, we found we'd
> emptied (and invalidated) a whole page of mappings:
> |	if (stage2_pmd_table_empty(kvm, start_pmd))
> |		clear_stage2_pud_entry(mmu, pud, start_addr);
> 
> Now we want to remove the link to the empty page so we can free it. We are changing the
> structure of the tables, not what gets mapped.
> 
> I think this is why we need the un-hinted behaviour, to invalidate "any level of the
> translation table walk required to translate the specified IPA". Otherwise the hardware
> can look for a leaf at the indicated level, find none, and do nothing.
> 
> 
> This is sufficiently horrible, its possible I've got it completely wrong! (does it make
> sense?)

Ok. `addr` is an IPA, that IPA is now omitted from the map so doesn't
appear in any entry of the table, least of all a leaf entry. That makes
sense.

Is there a convention to distinguish IPA and PA similar to the
distinction for VA or does kvmarm just use phys_addr_t all round?

It seems like the TTL patches are failry self contained if it would be
easier to serparate them out from these larger series?
Marc Zyngier May 27, 2020, 8:59 a.m. UTC | #5
On 2020-05-13 10:06, Andrew Scull wrote:
> On Tue, May 12, 2020 at 01:04:31PM +0100, James Morse wrote:
>> Hi Andrew,
>> 
>> On 07/05/2020 16:13, Andrew Scull wrote:
>> >> @@ -176,7 +177,7 @@ static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr
>> >>  	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0);
>> >>  	VM_BUG_ON(stage2_pud_huge(kvm, *pud));
>> >>  	stage2_pud_clear(kvm, pud);
>> >> -	kvm_tlb_flush_vmid_ipa(mmu, addr);
>> >> +	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
>> >>  	stage2_pmd_free(kvm, pmd_table);
>> >>  	put_page(virt_to_page(pud));
>> >>  }
>> >> @@ -186,7 +187,7 @@ static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr
>> >>  	pte_t *pte_table = pte_offset_kernel(pmd, 0);
>> >>  	VM_BUG_ON(pmd_thp_or_huge(*pmd));
>> >>  	pmd_clear(pmd);
>> >> -	kvm_tlb_flush_vmid_ipa(mmu, addr);
>> >> +	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
>> >>  	free_page((unsigned long)pte_table);
>> >>  	put_page(virt_to_page(pmd));
>> >>  }
>> >
>> > Going by the names, is it possible to give a better level hint for these
>> > cases?
>> 
>> There is no leaf entry being invalidated here. After clearing the 
>> range, we found we'd
>> emptied (and invalidated) a whole page of mappings:
>> |	if (stage2_pmd_table_empty(kvm, start_pmd))
>> |		clear_stage2_pud_entry(mmu, pud, start_addr);
>> 
>> Now we want to remove the link to the empty page so we can free it. We 
>> are changing the
>> structure of the tables, not what gets mapped.
>> 
>> I think this is why we need the un-hinted behaviour, to invalidate 
>> "any level of the
>> translation table walk required to translate the specified IPA". 
>> Otherwise the hardware
>> can look for a leaf at the indicated level, find none, and do nothing.
>> 
>> 
>> This is sufficiently horrible, its possible I've got it completely 
>> wrong! (does it make
>> sense?)
> 
> Ok. `addr` is an IPA, that IPA is now omitted from the map so doesn't
> appear in any entry of the table, least of all a leaf entry. That makes
> sense.
> 
> Is there a convention to distinguish IPA and PA similar to the
> distinction for VA or does kvmarm just use phys_addr_t all round?
> 
> It seems like the TTL patches are failry self contained if it would be
> easier to serparate them out from these larger series?

They are. This whole series is a mix of unrelated patches anyway.
Their only goal is to make my life a bit easier in the distant
future.

I'll repost that anyway, as I have made some cosmetic changes.

Thanks,

         M.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
index 326aac658b9da..7ed5c1a769a9b 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -230,4 +230,13 @@  stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 	return (boundary - 1 < end - 1) ? boundary : end;
 }
 
+/*
+ * Level values for the ARMv8.4-TTL extension, mapping PUD/PMD/PTE and
+ * the architectural page-table level.
+ */
+#define S2_NO_LEVEL_HINT	0
+#define S2_PUD_LEVEL		1
+#define S2_PMD_LEVEL		2
+#define S2_PTE_LEVEL		3
+
 #endif	/* __ARM64_S2_PGTABLE_H_ */
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 611234e0aef12..2c132f5595c4b 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -58,9 +58,10 @@  void kvm_flush_remote_tlbs(struct kvm *kvm)
 	kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
 }
 
-static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
+static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
+				   int level)
 {
-	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa, 0);
+	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa, level);
 }
 
 /*
@@ -102,7 +103,7 @@  static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr, pmd_t
 		return;
 
 	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(mmu, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL);
 	put_page(virt_to_page(pmd));
 }
 
@@ -122,7 +123,7 @@  static void stage2_dissolve_pud(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t
 		return;
 
 	stage2_pud_clear(kvm, pudp);
-	kvm_tlb_flush_vmid_ipa(mmu, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL);
 	put_page(virt_to_page(pudp));
 }
 
@@ -164,7 +165,7 @@  static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr
 
 	pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, pgd, 0UL);
 	stage2_pgd_clear(kvm, pgd);
-	kvm_tlb_flush_vmid_ipa(mmu, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
 	stage2_pud_free(kvm, pud_table);
 	put_page(virt_to_page(pgd));
 }
@@ -176,7 +177,7 @@  static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr
 	pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0);
 	VM_BUG_ON(stage2_pud_huge(kvm, *pud));
 	stage2_pud_clear(kvm, pud);
-	kvm_tlb_flush_vmid_ipa(mmu, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
 	stage2_pmd_free(kvm, pmd_table);
 	put_page(virt_to_page(pud));
 }
@@ -186,7 +187,7 @@  static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr
 	pte_t *pte_table = pte_offset_kernel(pmd, 0);
 	VM_BUG_ON(pmd_thp_or_huge(*pmd));
 	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(mmu, addr);
+	kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
 	free_page((unsigned long)pte_table);
 	put_page(virt_to_page(pmd));
 }
@@ -256,7 +257,7 @@  static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
 			pte_t old_pte = *pte;
 
 			kvm_set_pte(pte, __pte(0));
-			kvm_tlb_flush_vmid_ipa(mmu, addr);
+			kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL);
 
 			/* No need to invalidate the cache for device mappings */
 			if (!kvm_is_device_pfn(pte_pfn(old_pte)))
@@ -285,7 +286,7 @@  static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
 				pmd_t old_pmd = *pmd;
 
 				pmd_clear(pmd);
-				kvm_tlb_flush_vmid_ipa(mmu, addr);
+				kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL);
 
 				kvm_flush_dcache_pmd(old_pmd);
 
@@ -315,7 +316,7 @@  static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
 				pud_t old_pud = *pud;
 
 				stage2_pud_clear(kvm, pud);
-				kvm_tlb_flush_vmid_ipa(mmu, addr);
+				kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL);
 				kvm_flush_dcache_pud(old_pud);
 				put_page(virt_to_page(pud));
 			} else {
@@ -1129,7 +1130,7 @@  static int stage2_set_pmd_huge(struct kvm_s2_mmu *mmu,
 		 */
 		WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
 		pmd_clear(pmd);
-		kvm_tlb_flush_vmid_ipa(mmu, addr);
+		kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL);
 	} else {
 		get_page(virt_to_page(pmd));
 	}
@@ -1171,7 +1172,7 @@  static int stage2_set_pud_huge(struct kvm_s2_mmu *mmu,
 
 		WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp));
 		stage2_pud_clear(kvm, pudp);
-		kvm_tlb_flush_vmid_ipa(mmu, addr);
+		kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL);
 	} else {
 		get_page(virt_to_page(pudp));
 	}
@@ -1320,7 +1321,7 @@  static int stage2_set_pte(struct kvm_s2_mmu *mmu,
 			return 0;
 
 		kvm_set_pte(pte, __pte(0));
-		kvm_tlb_flush_vmid_ipa(mmu, addr);
+		kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL);
 	} else {
 		get_page(virt_to_page(pte));
 	}