diff mbox series

[RFC,07/17] KVM: arm64: Enlighten perm relax path about parallel walks

Message ID 20220415215901.1737897-8-oupton@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Parallelize stage 2 fault handling | expand

Commit Message

Oliver Upton April 15, 2022, 9:58 p.m. UTC
To date the permission relax path of the stage-2 fault handler hasn't
had to worry about the paging structures changing under its nose, as map
operations acquire the write lock. That's about to change, which means a
permission relaxation walker could traverse in parallel with a map
operation.

If at any point during traversal the permission relax walker finds a
locked pte, bail immediately. Either the instruction will succeed or the
vCPU will fault once more and (hopefully) walk the tables successfully.

Signed-off-by: Oliver Upton <oupton@google.com>
---
 arch/arm64/kvm/hyp/pgtable.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 059ebb921125..ff6f14755d0c 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1168,6 +1168,11 @@  static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 	struct stage2_attr_data *data = arg;
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
+	if (stage2_pte_is_locked(pte)) {
+		WARN_ON(!shared);
+		return -EAGAIN;
+	}
+
 	if (!kvm_pte_valid(pte))
 		return 0;
 
@@ -1190,7 +1195,9 @@  static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 		    stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
 			mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
 						  kvm_granule_size(level));
-		WRITE_ONCE(*ptep, pte);
+
+		if (!kvm_try_set_pte(ptep, data->pte, pte, shared))
+			return -EAGAIN;
 	}
 
 	return 0;
@@ -1199,7 +1206,7 @@  static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
 				    u64 size, kvm_pte_t attr_set,
 				    kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
-				    u32 *level)
+				    u32 *level, bool shared)
 {
 	int ret;
 	kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
@@ -1214,7 +1221,7 @@  static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
 		.flags		= KVM_PGTABLE_WALK_LEAF,
 	};
 
-	ret = kvm_pgtable_walk(pgt, addr, size, &walker, false);
+	ret = kvm_pgtable_walk(pgt, addr, size, &walker, shared);
 	if (ret)
 		return ret;
 
@@ -1230,14 +1237,14 @@  int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
 {
 	return stage2_update_leaf_attrs(pgt, addr, size, 0,
 					KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
-					NULL, NULL);
+					NULL, NULL, false);
 }
 
 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
 {
 	kvm_pte_t pte = 0;
 	stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
-				 &pte, NULL);
+				 &pte, NULL, false);
 	dsb(ishst);
 	return pte;
 }
@@ -1246,7 +1253,7 @@  kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
 {
 	kvm_pte_t pte = 0;
 	stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
-				 &pte, NULL);
+				 &pte, NULL, false);
 	/*
 	 * "But where's the TLBI?!", you scream.
 	 * "Over in the core code", I sigh.
@@ -1259,7 +1266,7 @@  kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
 {
 	kvm_pte_t pte = 0;
-	stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL);
+	stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, false);
 	return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
 }
 
@@ -1282,7 +1289,7 @@  int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
 	if (prot & KVM_PGTABLE_PROT_X)
 		clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
 
-	ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level);
+	ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, true);
 	if (!ret)
 		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
 	return ret;