@@ -132,4 +132,6 @@ unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val);
int kvm_init_nv_sysregs(struct kvm *kvm);
+#define KVM_NV_GUEST_MAP_SZ (KVM_PGTABLE_PROT_SW1 | KVM_PGTABLE_PROT_SW0)
+
#endif /* __ARM64_KVM_NESTED_H */
@@ -4,6 +4,7 @@
* Author: Jintack Lim <jintack.lim@linaro.org>
*/
+#include <linux/bitfield.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
@@ -407,6 +408,81 @@ static unsigned int ttl_to_size(u8 ttl)
return max_size;
}
+/*
+ * Compute the equivalent of the TTL field by parsing the shadow PT. The
+ * granule size is extracted from the cached VTCR_EL2.TG0 while the level is
+ * retrieved from first entry carrying the level as a tag.
+ */
+static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr)
+{
+ u64 tmp, sz = 0, vtcr = mmu->tlb_vtcr;
+ kvm_pte_t pte;
+ u8 ttl, level;
+
+ switch (vtcr & VTCR_EL2_TG0_MASK) {
+ case VTCR_EL2_TG0_4K:
+ ttl = (1 << 2);
+ break;
+ case VTCR_EL2_TG0_16K:
+ ttl = (2 << 2);
+ break;
+ case VTCR_EL2_TG0_64K:
+ ttl = (3 << 2);
+ break;
+ default:
+ BUG();
+ }
+
+ tmp = addr;
+
+again:
+ /* Iteratively compute the block sizes for a particular granule size */
+ switch (vtcr & VTCR_EL2_TG0_MASK) {
+ case VTCR_EL2_TG0_4K:
+ if (sz < SZ_4K) sz = SZ_4K;
+ else if (sz < SZ_2M) sz = SZ_2M;
+ else if (sz < SZ_1G) sz = SZ_1G;
+ else sz = 0;
+ break;
+ case VTCR_EL2_TG0_16K:
+ if (sz < SZ_16K) sz = SZ_16K;
+ else if (sz < SZ_32M) sz = SZ_32M;
+ else sz = 0;
+ break;
+ case VTCR_EL2_TG0_64K:
+ if (sz < SZ_64K) sz = SZ_64K;
+ else if (sz < SZ_512M) sz = SZ_512M;
+ else sz = 0;
+ break;
+ default:
+ BUG();
+ }
+
+ if (sz == 0)
+ return 0;
+
+ tmp &= ~(sz - 1);
+ if (kvm_pgtable_get_leaf(mmu->pgt, tmp, &pte, NULL))
+ goto again;
+ if (!(pte & PTE_VALID))
+ goto again;
+ level = FIELD_GET(KVM_NV_GUEST_MAP_SZ, pte);
+ if (!level)
+ goto again;
+
+ ttl |= level;
+
+ /*
+ * We now have found some level information in the shadow S2. Check
+ * that the resulting range is actually including the original IPA.
+ */
+ sz = ttl_to_size(ttl);
+ if (addr < (tmp + sz))
+ return ttl;
+
+ return 0;
+}
+
unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val)
{
unsigned long max_size;
@@ -414,6 +490,11 @@ unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val)
ttl = FIELD_GET(GENMASK_ULL(47, 44), val);
+ if (!(cpus_have_final_cap(ARM64_HAS_ARMv8_4_TTL) && ttl)) {
+ u64 addr = (val & GENMASK_ULL(35, 0)) << 12;
+ ttl = get_guest_mapping_ttl(mmu, addr);
+ }
+
max_size = ttl_to_size(ttl);
if (!max_size) {
In order to be able to make S2 TLB invalidations more performant on NV, let's use a scheme derived from the ARMv8.4 TTL extension. If bits [56:55] in the descriptor are non-zero, they indicate a level which can be used as an invalidation range. Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/include/asm/kvm_nested.h | 2 + arch/arm64/kvm/nested.c | 81 +++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+)