diff mbox series

[v2,08/15] KVM: arm64: Protect stage-2 traversal with RCU

Message ID 20221007232818.459650-9-oliver.upton@linux.dev (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Parallel stage-2 fault handling | expand

Commit Message

Oliver Upton Oct. 7, 2022, 11:28 p.m. UTC
The use of RCU is necessary to safely change the stage-2 page tables in
parallel. Acquire and release the RCU read lock when traversing the page
tables.

Use the _raw() flavor of rcu_dereference when changes to the page tables
are otherwise protected from parallel software walkers (e.g. holding the
write lock).

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 arch/arm64/include/asm/kvm_pgtable.h | 34 ++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/pgtable.c         |  7 +++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

Comments

Sean Christopherson Oct. 19, 2022, 11:29 p.m. UTC | #1
On Fri, Oct 07, 2022, Oliver Upton wrote:
> The use of RCU is necessary to safely change the stage-2 page tables in
> parallel. Acquire and release the RCU read lock when traversing the page
> tables.
> 
> Use the _raw() flavor of rcu_dereference when changes to the page tables
> are otherwise protected from parallel software walkers (e.g. holding the
> write lock).
> 
> Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
> ---

...

> @@ -32,6 +39,33 @@ static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared
>  	return pteref;
>  }
>  
> +static inline void kvm_pgtable_walk_begin(void) {}
> +static inline void kvm_pgtable_walk_end(void) {}
> +
> +#else
> +
> +typedef kvm_pte_t __rcu *kvm_pteref_t;
> +
> +static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
> +{
> +	if (shared)
> +		return rcu_dereference(pteref);
> +
> +	return rcu_dereference_raw(pteref);

Rather than use raw, use rcu_dereference_check().  If you can plumb down @kvm or
@mmu_lock, the ideal check would be (apparently there's no lockdep_is_held_write()
wrapper?)

	return READ_ONCE(*rcu_dereference_check(ptep, lockdep_is_held_type(mmu_lock, 0)));

If getting at mmu_lock is too difficult, this can still be

	return READ_ONCE(*rcu_dereference_check(ptep, !shared);

Doubt it matters for code generation, but IMO it's cleaner overall.
Oliver Upton Oct. 20, 2022, 8:34 a.m. UTC | #2
On Wed, Oct 19, 2022 at 11:29:56PM +0000, Sean Christopherson wrote:
> On Fri, Oct 07, 2022, Oliver Upton wrote:
> > The use of RCU is necessary to safely change the stage-2 page tables in
> > parallel. Acquire and release the RCU read lock when traversing the page
> > tables.
> > 
> > Use the _raw() flavor of rcu_dereference when changes to the page tables
> > are otherwise protected from parallel software walkers (e.g. holding the
> > write lock).
> > 
> > Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
> > ---
> 
> ...
> 
> > @@ -32,6 +39,33 @@ static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared
> >  	return pteref;
> >  }
> >  
> > +static inline void kvm_pgtable_walk_begin(void) {}
> > +static inline void kvm_pgtable_walk_end(void) {}
> > +
> > +#else
> > +
> > +typedef kvm_pte_t __rcu *kvm_pteref_t;
> > +
> > +static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
> > +{
> > +	if (shared)
> > +		return rcu_dereference(pteref);
> > +
> > +	return rcu_dereference_raw(pteref);
> 
> Rather than use raw, use rcu_dereference_check().  If you can plumb down @kvm or
> @mmu_lock, the ideal check would be (apparently there's no lockdep_is_held_write()
> wrapper?)
> 
> 	return READ_ONCE(*rcu_dereference_check(ptep, lockdep_is_held_type(mmu_lock, 0)));
> 
> If getting at mmu_lock is too difficult, this can still be
> 
> 	return READ_ONCE(*rcu_dereference_check(ptep, !shared);
> 
> Doubt it matters for code generation, but IMO it's cleaner overall.

As the page table walkers can be used outside of the context of a VM
(such as hyp stage-1), I think option #2 is probably a bit easier.

--
Thanks,
Oliver
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index beb89eac155c..60c37e5e77dd 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -25,6 +25,13 @@  static inline u64 kvm_get_parange(u64 mmfr0)
 
 typedef u64 kvm_pte_t;
 
+/*
+ * RCU cannot be used in a non-kernel context such as the hyp. As such, page
+ * table walkers used in hyp do not call into RCU and instead use other
+ * synchronization mechanisms (such as a spinlock).
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+
 typedef kvm_pte_t *kvm_pteref_t;
 
 static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
@@ -32,6 +39,33 @@  static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared
 	return pteref;
 }
 
+static inline void kvm_pgtable_walk_begin(void) {}
+static inline void kvm_pgtable_walk_end(void) {}
+
+#else
+
+typedef kvm_pte_t __rcu *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
+{
+	if (shared)
+		return rcu_dereference(pteref);
+
+	return rcu_dereference_raw(pteref);
+}
+
+static inline void kvm_pgtable_walk_begin(void)
+{
+	rcu_read_lock();
+}
+
+static inline void kvm_pgtable_walk_end(void)
+{
+	rcu_read_unlock();
+}
+
+#endif
+
 #define KVM_PTE_VALID			BIT(0)
 
 #define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 6b6e1ed7ee2f..c2be15850497 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -281,8 +281,13 @@  int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		.end	= PAGE_ALIGN(walk_data.addr + size),
 		.walker	= walker,
 	};
+	int r;
 
-	return _kvm_pgtable_walk(pgt, &walk_data);
+	kvm_pgtable_walk_begin();
+	r = _kvm_pgtable_walk(pgt, &walk_data);
+	kvm_pgtable_walk_end();
+
+	return r;
 }
 
 struct leaf_walk_data {