diff mbox series

[v5,06/25] KVM: arm64: Implement do_donate() helper for donating memory

Message ID 20221020133827.5541-7-will@kernel.org (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Introduce pKVM hyp VM and vCPU state at EL2 | expand

Commit Message

Will Deacon Oct. 20, 2022, 1:38 p.m. UTC
Transferring ownership information of a memory region from one component
to another can be achieved using a "donate" operation, which results
in the previous owner losing access to the underlying pages entirely
and the new owner having exclusive access to the page.

Implement a do_donate() helper, along the same lines as do_{un,}share,
and provide this functionality for the host-{to,from}-hyp cases as this
will later be used to donate/reclaim memory pages to store VM metadata
at EL2.

In a similar manner to the sharing transitions, permission checks are
performed by the hypervisor to ensure that the component initiating the
transition really is the owner of the page and also that the completer
does not currently have a page mapped at the target address.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Co-developed-by: Quentin Perret <qperret@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |   2 +
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 239 ++++++++++++++++++
 2 files changed, 241 insertions(+)

Comments

Oliver Upton Oct. 28, 2022, 7:52 a.m. UTC | #1
On Thu, Oct 20, 2022 at 02:38:08PM +0100, Will Deacon wrote:
> Transferring ownership information of a memory region from one component
> to another can be achieved using a "donate" operation, which results
> in the previous owner losing access to the underlying pages entirely
> and the new owner having exclusive access to the page.
> 
> Implement a do_donate() helper, along the same lines as do_{un,}share,
> and provide this functionality for the host-{to,from}-hyp cases as this
> will later be used to donate/reclaim memory pages to store VM metadata
> at EL2.
> 
> In a similar manner to the sharing transitions, permission checks are
> performed by the hypervisor to ensure that the component initiating the
> transition really is the owner of the page and also that the completer
> does not currently have a page mapped at the target address.

Is the intention of this infra to support memory donations between more
than just the host + hyp components? This patch goes out of its way to
build some generic helpers for things, but it isn't immediately obvious
why that is necessary for just two supported state transitions.

[...]

> diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> index f5705a1e972f..c87b19b2d468 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> @@ -60,6 +60,8 @@ enum pkvm_component_id {
>  int __pkvm_prot_finalize(void);
>  int __pkvm_host_share_hyp(u64 pfn);
>  int __pkvm_host_unshare_hyp(u64 pfn);
> +int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
> +int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
>  
>  bool addr_is_memory(phys_addr_t phys);
>  int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index ff86f5bd230f..c30402737548 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -391,6 +391,9 @@ struct pkvm_mem_transition {
>  				/* Address in the completer's address space */
>  				u64	completer_addr;
>  			} host;
> +			struct {
> +				u64	completer_addr;
> +			} hyp;

I don't believe the union is providing a ton of value here. In fact, the
whole layout of the pkvm_mem_transition structure confuses me a little.
Why not move 'completer_addr' to pkvm_mem_transition::completer::addr?

You'd then have two identical structs for describing the source and
target addresses for a chunk of memory. IDK if this would be needed
later on, but such a struct could be worthy of its own type as it fully
describes the address and its owning address space.

Spitballing:

	struct pkvm_mem_transition {
		u64	nr_pages;

		struct {
			enum pkvm_component_id	id;
			u64			addr;
		} source;

		struct {
			enum pkvm_component_id	id;
			u64			addr;
		} target;
	};

>  		};
>  	} initiator;
>  
> @@ -404,6 +407,10 @@ struct pkvm_mem_share {
>  	const enum kvm_pgtable_prot		completer_prot;
>  };
>  
> +struct pkvm_mem_donation {
> +	const struct pkvm_mem_transition	tx;
> +};
> +

What is the purpose of introducing another struct here? AFAICT none of
the subsequent patches add fields to this.

>  struct check_walk_data {
>  	enum pkvm_page_state	desired;
>  	enum pkvm_page_state	(*get_page_state)(kvm_pte_t pte);
> @@ -503,6 +510,46 @@ static int host_initiate_unshare(u64 *completer_addr,
>  	return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
>  }
>  
> +static int host_initiate_donation(u64 *completer_addr,
> +				  const struct pkvm_mem_transition *tx)

<bikeshed>

The {host,hyp}_initiate_donation() function names are a tiny bit
confusing. IMO, referring to this phase of the donation as 'disowning'
might make it more obvious what is actually changing in the page tables
at this moment.

</bikeshed>

> +{
> +	u8 owner_id = tx->completer.id;
> +	u64 size = tx->nr_pages * PAGE_SIZE;
> +
> +	*completer_addr = tx->initiator.host.completer_addr;

This kind of out pointer is extremely funky... Rejigging
pkvm_mem_transition would allow __do_donate() to work out the
'completer_addr' directly.

> +	return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id);
> +}
> +
> +static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
> +{
> +	return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
> +		 tx->initiator.id != PKVM_ID_HYP);
> +}
> +
> +static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx,
> +				 enum pkvm_page_state state)
> +{
> +	u64 size = tx->nr_pages * PAGE_SIZE;
> +
> +	if (__host_ack_skip_pgtable_check(tx))
> +		return 0;
> +
> +	return __host_check_page_state_range(addr, size, state);
> +}
> +
> +static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
> +{
> +	return __host_ack_transition(addr, tx, PKVM_NOPAGE);
> +}
> +
> +static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx)
> +{
> +	u64 size = tx->nr_pages * PAGE_SIZE;
> +	u8 host_id = tx->completer.id;
> +
> +	return host_stage2_set_owner_locked(addr, size, host_id);
> +}
> +
>  static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
>  {
>  	if (!kvm_pte_valid(pte))
> @@ -523,6 +570,27 @@ static int __hyp_check_page_state_range(u64 addr, u64 size,
>  	return check_page_state_range(&pkvm_pgtable, addr, size, &d);
>  }
>  
> +static int hyp_request_donation(u64 *completer_addr,
> +				const struct pkvm_mem_transition *tx)

I'm not too big of a fan of the request/ack verbiage here. IMO, it is
suggestive of some form of message passing between the two components.
But, AFAICT:

 - 'request' checks that the component owns the pages it is trying to
   donate.

 - 'ack' checks that the component doesn't have anything mapped at the
   target address

Why not call it {host,hyp}_check_range_owned() and
{host,hyp}_check_range_unmapped()? That way it is immediately obvious
what conditions are being tested in check_donation().

Sorry, I see that there is some groundwork for this already upstream,
but I still find it confusing.

[...]

> +static int check_donation(struct pkvm_mem_donation *donation)
> +{
> +	const struct pkvm_mem_transition *tx = &donation->tx;
> +	u64 completer_addr;
> +	int ret;
> +
> +	switch (tx->initiator.id) {
> +	case PKVM_ID_HOST:
> +		ret = host_request_owned_transition(&completer_addr, tx);
> +		break;
> +	case PKVM_ID_HYP:
> +		ret = hyp_request_donation(&completer_addr, tx);
> +		break;
> +	default:
> +		ret = -EINVAL;
> +	}
> +
> +	if (ret)
> +		return ret;
> +
> +	switch (tx->completer.id){
				^^
nit: whitespace

> +	case PKVM_ID_HOST:
> +		ret = host_ack_donation(completer_addr, tx);
> +		break;
> +	case PKVM_ID_HYP:
> +		ret = hyp_ack_donation(completer_addr, tx);
> +		break;
> +	default:
> +		ret = -EINVAL;
> +	}
> +
> +	return ret;
> +}
> +
> +static int __do_donate(struct pkvm_mem_donation *donation)
> +{
> +	const struct pkvm_mem_transition *tx = &donation->tx;
> +	u64 completer_addr;
> +	int ret;
> +
> +	switch (tx->initiator.id) {
> +	case PKVM_ID_HOST:
> +		ret = host_initiate_donation(&completer_addr, tx);
> +		break;
> +	case PKVM_ID_HYP:
> +		ret = hyp_initiate_donation(&completer_addr, tx);
> +		break;
> +	default:
> +		ret = -EINVAL;
> +	}
> +
> +	if (ret)
> +		return ret;
> +
> +	switch (tx->completer.id){
				^^
nit: whitespace

--
Thanks,
Oliver
Quentin Perret Oct. 28, 2022, 10:01 a.m. UTC | #2
On Friday 28 Oct 2022 at 07:52:38 (+0000), Oliver Upton wrote:
> Is the intention of this infra to support memory donations between more
> than just the host + hyp components? This patch goes out of its way to
> build some generic helpers for things, but it isn't immediately obvious
> why that is necessary for just two supported state transitions.

Yup, the plan is to use all this infrastructure for host-guest and
host-trustzone transitions, but that's indeed not very obvious from the
patch. We should probably mention that in the commit message.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index f5705a1e972f..c87b19b2d468 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -60,6 +60,8 @@  enum pkvm_component_id {
 int __pkvm_prot_finalize(void);
 int __pkvm_host_share_hyp(u64 pfn);
 int __pkvm_host_unshare_hyp(u64 pfn);
+int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
+int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
 
 bool addr_is_memory(phys_addr_t phys);
 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index ff86f5bd230f..c30402737548 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -391,6 +391,9 @@  struct pkvm_mem_transition {
 				/* Address in the completer's address space */
 				u64	completer_addr;
 			} host;
+			struct {
+				u64	completer_addr;
+			} hyp;
 		};
 	} initiator;
 
@@ -404,6 +407,10 @@  struct pkvm_mem_share {
 	const enum kvm_pgtable_prot		completer_prot;
 };
 
+struct pkvm_mem_donation {
+	const struct pkvm_mem_transition	tx;
+};
+
 struct check_walk_data {
 	enum pkvm_page_state	desired;
 	enum pkvm_page_state	(*get_page_state)(kvm_pte_t pte);
@@ -503,6 +510,46 @@  static int host_initiate_unshare(u64 *completer_addr,
 	return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
 }
 
+static int host_initiate_donation(u64 *completer_addr,
+				  const struct pkvm_mem_transition *tx)
+{
+	u8 owner_id = tx->completer.id;
+	u64 size = tx->nr_pages * PAGE_SIZE;
+
+	*completer_addr = tx->initiator.host.completer_addr;
+	return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id);
+}
+
+static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
+{
+	return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
+		 tx->initiator.id != PKVM_ID_HYP);
+}
+
+static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx,
+				 enum pkvm_page_state state)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+
+	if (__host_ack_skip_pgtable_check(tx))
+		return 0;
+
+	return __host_check_page_state_range(addr, size, state);
+}
+
+static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
+{
+	return __host_ack_transition(addr, tx, PKVM_NOPAGE);
+}
+
+static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+	u8 host_id = tx->completer.id;
+
+	return host_stage2_set_owner_locked(addr, size, host_id);
+}
+
 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
 {
 	if (!kvm_pte_valid(pte))
@@ -523,6 +570,27 @@  static int __hyp_check_page_state_range(u64 addr, u64 size,
 	return check_page_state_range(&pkvm_pgtable, addr, size, &d);
 }
 
+static int hyp_request_donation(u64 *completer_addr,
+				const struct pkvm_mem_transition *tx)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+	u64 addr = tx->initiator.addr;
+
+	*completer_addr = tx->initiator.hyp.completer_addr;
+	return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
+}
+
+static int hyp_initiate_donation(u64 *completer_addr,
+				 const struct pkvm_mem_transition *tx)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+	int ret;
+
+	*completer_addr = tx->initiator.hyp.completer_addr;
+	ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size);
+	return (ret != size) ? -EFAULT : 0;
+}
+
 static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
 {
 	return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
@@ -554,6 +622,16 @@  static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
 					    PKVM_PAGE_SHARED_BORROWED);
 }
 
+static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+
+	if (__hyp_ack_skip_pgtable_check(tx))
+		return 0;
+
+	return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
+}
+
 static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
 			      enum kvm_pgtable_prot perms)
 {
@@ -572,6 +650,15 @@  static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
 	return (ret != size) ? -EFAULT : 0;
 }
 
+static int hyp_complete_donation(u64 addr,
+				 const struct pkvm_mem_transition *tx)
+{
+	void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
+	enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
+
+	return pkvm_create_mappings_locked(start, end, prot);
+}
+
 static int check_share(struct pkvm_mem_share *share)
 {
 	const struct pkvm_mem_transition *tx = &share->tx;
@@ -724,6 +811,94 @@  static int do_unshare(struct pkvm_mem_share *share)
 	return WARN_ON(__do_unshare(share));
 }
 
+static int check_donation(struct pkvm_mem_donation *donation)
+{
+	const struct pkvm_mem_transition *tx = &donation->tx;
+	u64 completer_addr;
+	int ret;
+
+	switch (tx->initiator.id) {
+	case PKVM_ID_HOST:
+		ret = host_request_owned_transition(&completer_addr, tx);
+		break;
+	case PKVM_ID_HYP:
+		ret = hyp_request_donation(&completer_addr, tx);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret)
+		return ret;
+
+	switch (tx->completer.id){
+	case PKVM_ID_HOST:
+		ret = host_ack_donation(completer_addr, tx);
+		break;
+	case PKVM_ID_HYP:
+		ret = hyp_ack_donation(completer_addr, tx);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int __do_donate(struct pkvm_mem_donation *donation)
+{
+	const struct pkvm_mem_transition *tx = &donation->tx;
+	u64 completer_addr;
+	int ret;
+
+	switch (tx->initiator.id) {
+	case PKVM_ID_HOST:
+		ret = host_initiate_donation(&completer_addr, tx);
+		break;
+	case PKVM_ID_HYP:
+		ret = hyp_initiate_donation(&completer_addr, tx);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret)
+		return ret;
+
+	switch (tx->completer.id){
+	case PKVM_ID_HOST:
+		ret = host_complete_donation(completer_addr, tx);
+		break;
+	case PKVM_ID_HYP:
+		ret = hyp_complete_donation(completer_addr, tx);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/*
+ * do_donate():
+ *
+ * The page owner transfers ownership to another component, losing access
+ * as a consequence.
+ *
+ * Initiator: OWNED	=> NOPAGE
+ * Completer: NOPAGE	=> OWNED
+ */
+static int do_donate(struct pkvm_mem_donation *donation)
+{
+	int ret;
+
+	ret = check_donation(donation);
+	if (ret)
+		return ret;
+
+	return WARN_ON(__do_donate(donation));
+}
+
 int __pkvm_host_share_hyp(u64 pfn)
 {
 	int ret;
@@ -789,3 +964,67 @@  int __pkvm_host_unshare_hyp(u64 pfn)
 
 	return ret;
 }
+
+int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
+{
+	int ret;
+	u64 host_addr = hyp_pfn_to_phys(pfn);
+	u64 hyp_addr = (u64)__hyp_va(host_addr);
+	struct pkvm_mem_donation donation = {
+		.tx	= {
+			.nr_pages	= nr_pages,
+			.initiator	= {
+				.id	= PKVM_ID_HOST,
+				.addr	= host_addr,
+				.host	= {
+					.completer_addr = hyp_addr,
+				},
+			},
+			.completer	= {
+				.id	= PKVM_ID_HYP,
+			},
+		},
+	};
+
+	host_lock_component();
+	hyp_lock_component();
+
+	ret = do_donate(&donation);
+
+	hyp_unlock_component();
+	host_unlock_component();
+
+	return ret;
+}
+
+int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
+{
+	int ret;
+	u64 host_addr = hyp_pfn_to_phys(pfn);
+	u64 hyp_addr = (u64)__hyp_va(host_addr);
+	struct pkvm_mem_donation donation = {
+		.tx	= {
+			.nr_pages	= nr_pages,
+			.initiator	= {
+				.id	= PKVM_ID_HYP,
+				.addr	= hyp_addr,
+				.hyp	= {
+					.completer_addr = host_addr,
+				},
+			},
+			.completer	= {
+				.id	= PKVM_ID_HOST,
+			},
+		},
+	};
+
+	host_lock_component();
+	hyp_lock_component();
+
+	ret = do_donate(&donation);
+
+	hyp_unlock_component();
+	host_unlock_component();
+
+	return ret;
+}