diff mbox series

[v2,4/6] arm64/kvm: Avoid invalid physical addresses to signal owner updates

Message ID 20241205150229.3510177-12-ardb+git@google.com (mailing list archive)
State New
Headers show
Series arm64: Clean up and simplify PA space size handling | expand

Commit Message

Ard Biesheuvel Dec. 5, 2024, 3:02 p.m. UTC
From: Ard Biesheuvel <ardb@kernel.org>

The pKVM stage2 mapping code relies on an invalid physical address to
signal to the internal API that only the owner_id fields of descriptors
should be updated, and these are stored in the high bits of invalid
descriptors covering memory that has been donated to protected guests,
and is therefore unmapped from the host stage-2 page tables.

Given that these invalid PAs are never stored into the descriptors, it
is better to rely on an explicit flag, to clarify the API and to avoid
confusion regarding whether or not the output address of a descriptor
can ever be invalid to begin with (which is not the case with LPA2).

That removes a dependency on the logic that reasons about the maximum PA
range, which differs on LPA2 capable CPUs based on whether LPA2 is
enabled or not, and will be further clarified in subsequent patches.

Cc: Quentin Perret <qperret@google.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/kvm/hyp/pgtable.c | 33 ++++++--------------
 1 file changed, 10 insertions(+), 23 deletions(-)

Comments

Will Deacon Dec. 11, 2024, 10:14 p.m. UTC | #1
Hi Ard,

On Thu, Dec 05, 2024 at 04:02:34PM +0100, Ard Biesheuvel wrote:
> From: Ard Biesheuvel <ardb@kernel.org>
> 
> The pKVM stage2 mapping code relies on an invalid physical address to
> signal to the internal API that only the owner_id fields of descriptors
> should be updated, and these are stored in the high bits of invalid
> descriptors covering memory that has been donated to protected guests,
> and is therefore unmapped from the host stage-2 page tables.
> 
> Given that these invalid PAs are never stored into the descriptors, it
> is better to rely on an explicit flag, to clarify the API and to avoid
> confusion regarding whether or not the output address of a descriptor
> can ever be invalid to begin with (which is not the case with LPA2).
> 
> That removes a dependency on the logic that reasons about the maximum PA
> range, which differs on LPA2 capable CPUs based on whether LPA2 is
> enabled or not, and will be further clarified in subsequent patches.
> 
> Cc: Quentin Perret <qperret@google.com>
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
>  arch/arm64/kvm/hyp/pgtable.c | 33 ++++++--------------
>  1 file changed, 10 insertions(+), 23 deletions(-)

Sorry that I didn't reply again on v1, but I have an annoying request
that would make this a little easier for me to follow (since I'm tainted
with the pKVM stack in Android that we're gradually landing upstream):

> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 40bd55966540..0569e1d97c38 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -35,14 +35,6 @@ static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
>  	return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_CMO);
>  }
>  
> -static bool kvm_phys_is_valid(u64 phys)
> -{
> -	u64 parange_max = kvm_get_parange_max();
> -	u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max);
> -
> -	return phys < BIT(shift);
> -}
> -
>  static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys)
>  {
>  	u64 granule = kvm_granule_size(ctx->level);
> @@ -53,7 +45,7 @@ static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx,
>  	if (granule > (ctx->end - ctx->addr))
>  		return false;
>  
> -	if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
> +	if (!IS_ALIGNED(phys, granule))
>  		return false;
>  
>  	return IS_ALIGNED(ctx->addr, granule);
> @@ -587,6 +579,9 @@ struct stage2_map_data {
>  
>  	/* Force mappings to page granularity */
>  	bool				force_pte;
> +
> +	/* Walk should update owner_id only */
> +	bool				owner_update;

Can you rename this to "annotation", please? We'll eventually land other
types of invalid pte than ownership (e.g. MMIO_GUARD) and, given that
the ownership walker is caught by the 'force_pte' flag, it's a little
more generic.

Again, apologies I didn't ask for this earlier.

Will
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 40bd55966540..0569e1d97c38 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -35,14 +35,6 @@  static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
 	return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_CMO);
 }
 
-static bool kvm_phys_is_valid(u64 phys)
-{
-	u64 parange_max = kvm_get_parange_max();
-	u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max);
-
-	return phys < BIT(shift);
-}
-
 static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys)
 {
 	u64 granule = kvm_granule_size(ctx->level);
@@ -53,7 +45,7 @@  static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx,
 	if (granule > (ctx->end - ctx->addr))
 		return false;
 
-	if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
+	if (!IS_ALIGNED(phys, granule))
 		return false;
 
 	return IS_ALIGNED(ctx->addr, granule);
@@ -587,6 +579,9 @@  struct stage2_map_data {
 
 	/* Force mappings to page granularity */
 	bool				force_pte;
+
+	/* Walk should update owner_id only */
+	bool				owner_update;
 };
 
 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
@@ -885,18 +880,7 @@  static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
 {
 	u64 phys = data->phys;
 
-	/*
-	 * Stage-2 walks to update ownership data are communicated to the map
-	 * walker using an invalid PA. Avoid offsetting an already invalid PA,
-	 * which could overflow and make the address valid again.
-	 */
-	if (!kvm_phys_is_valid(phys))
-		return phys;
-
-	/*
-	 * Otherwise, work out the correct PA based on how far the walk has
-	 * gotten.
-	 */
+	/* Work out the correct PA based on how far the walk has gotten */
 	return phys + (ctx->addr - ctx->start);
 }
 
@@ -908,6 +892,9 @@  static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
 	if (data->force_pte && ctx->level < KVM_PGTABLE_LAST_LEVEL)
 		return false;
 
+	if (data->owner_update && ctx->level == KVM_PGTABLE_LAST_LEVEL)
+		return true;
+
 	return kvm_block_mapping_supported(ctx, phys);
 }
 
@@ -923,7 +910,7 @@  static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 	if (!stage2_leaf_mapping_allowed(ctx, data))
 		return -E2BIG;
 
-	if (kvm_phys_is_valid(phys))
+	if (!data->owner_update)
 		new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
 	else
 		new = kvm_init_invalid_leaf_owner(data->owner_id);
@@ -1085,11 +1072,11 @@  int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
 {
 	int ret;
 	struct stage2_map_data map_data = {
-		.phys		= KVM_PHYS_INVALID,
 		.mmu		= pgt->mmu,
 		.memcache	= mc,
 		.owner_id	= owner_id,
 		.force_pte	= true,
+		.owner_update	= true,
 	};
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_map_walker,