diff mbox series

[v2,12/16] KVM: arm64: Mark host bss and rodata section as shared

Message ID 20210726092905.2198501-13-qperret@google.com (mailing list archive)
State New, archived
Headers show
Series Track shared pages at EL2 in protected mode | expand

Commit Message

Quentin Perret July 26, 2021, 9:29 a.m. UTC
As the hypervisor maps the host's .bss and .rodata sections in its
stage-1, make sure to tag them as shared in hyp and host page-tables.

But since the hypervisor relies on the presence of these mappings, we
cannot let the host in complete control of the memory regions -- it
must not unshare or donate them to another entity for example. To
prevent this, let's transfer the ownership of those ranges to the
hypervisor itself, and share the pages back with the host.

Signed-off-by: Quentin Perret <qperret@google.com>
---
 arch/arm64/kvm/hyp/nvhe/setup.c | 52 ++++++++++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 8 deletions(-)

Comments

Quentin Perret July 28, 2021, 12:14 p.m. UTC | #1
On Monday 26 Jul 2021 at 10:29:01 (+0100), Quentin Perret wrote:
> +static int finalize_mappings(void)
> +{
> +	enum kvm_pgtable_prot prot;
> +	int ret;
> +
> +	/*
> +	 * The host's .bss and .rodata sections are now conceptually owned by
> +	 * the hypervisor, so mark them as 'borrowed' in the host stage-2. We
> +	 * can safely use host_stage2_idmap_locked() at this point since the
> +	 * host stage-2 has not been enabled yet.
> +	 */
> +	prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_SHARED_BORROWED);
> +	ret = host_stage2_idmap_locked(__hyp_pa(__start_rodata),
> +				       __hyp_pa(__end_rodata), prot);
> +	if (ret)
> +		return ret;
> +
> +	return host_stage2_idmap_locked(__hyp_pa(__hyp_bss_end),
> +					__hyp_pa(__bss_stop), prot);
> +}
> +
>  void __noreturn __pkvm_init_finalise(void)
>  {
>  	struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
> @@ -167,6 +199,10 @@ void __noreturn __pkvm_init_finalise(void)
>  	if (ret)
>  		goto out;
>  
> +	ret = finalize_mappings();
> +	if (ret)
> +		goto out;

While working on v3 of this series it occurred to me that we can
actually do vastly better than this. Specifically, the annotation of
shared pages currently happens in two places (recreate_hyp_mappings()
and finalize_mappings()) with nothing to guarantee they are in sync. At
the same time, the annotation of pages owned by the hypervisor is left
to the host itself using the __pkvm_mark_hyp hypercall. But clearly, by
the point we arrive to finalize_mappings() above, all the information I
need is already stored in the hyp pgtable. That is, it should be fairly
easy to walk the hyp stage-1, and for each valid mapping create a
matching annotation in the host stage-2 to mark the page shared or owned
by the hypervisor.

I'll have a go at implementing this in v3, which would guarantee
consistency across page-tables once the hypervisor is initialized, and
also allow to get rid of __pkvm_mark_hyp entirely. But if anybody thinks
this is a bad idea in the meantime, please shout!

Thanks,
Quentin
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 0b574d106519..285c8aea5065 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -58,6 +58,7 @@  static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 {
 	void *start, *end, *virt = hyp_phys_to_virt(phys);
 	unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT;
+	enum kvm_pgtable_prot prot;
 	int ret, i;
 
 	/* Recreate the hyp page-table using the early page allocator */
@@ -83,10 +84,6 @@  static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 	if (ret)
 		return ret;
 
-	ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO);
-	if (ret)
-		return ret;
-
 	ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO);
 	if (ret)
 		return ret;
@@ -95,10 +92,6 @@  static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 	if (ret)
 		return ret;
 
-	ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO);
-	if (ret)
-		return ret;
-
 	ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP);
 	if (ret)
 		return ret;
@@ -117,6 +110,24 @@  static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 			return ret;
 	}
 
+	/*
+	 * Map the host's .bss and .rodata sections RO in the hypervisor, but
+	 * transfer the ownerhsip from the host to the hypervisor itself to
+	 * make sure it can't be donated or shared with another entity.
+	 *
+	 * The ownership transtion requires matching changes in the host
+	 * stage-2. This will done later (see finalize_mappings()) once the
+	 * hyp_vmemmap is addressable.
+	 */
+	prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
+	ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot);
+	if (ret)
+		return ret;
+
+	ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -148,6 +159,27 @@  static void hpool_put_page(void *addr)
 	hyp_put_page(&hpool, addr);
 }
 
+static int finalize_mappings(void)
+{
+	enum kvm_pgtable_prot prot;
+	int ret;
+
+	/*
+	 * The host's .bss and .rodata sections are now conceptually owned by
+	 * the hypervisor, so mark them as 'borrowed' in the host stage-2. We
+	 * can safely use host_stage2_idmap_locked() at this point since the
+	 * host stage-2 has not been enabled yet.
+	 */
+	prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_SHARED_BORROWED);
+	ret = host_stage2_idmap_locked(__hyp_pa(__start_rodata),
+				       __hyp_pa(__end_rodata), prot);
+	if (ret)
+		return ret;
+
+	return host_stage2_idmap_locked(__hyp_pa(__hyp_bss_end),
+					__hyp_pa(__bss_stop), prot);
+}
+
 void __noreturn __pkvm_init_finalise(void)
 {
 	struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@@ -167,6 +199,10 @@  void __noreturn __pkvm_init_finalise(void)
 	if (ret)
 		goto out;
 
+	ret = finalize_mappings();
+	if (ret)
+		goto out;
+
 	pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
 		.zalloc_page = hyp_zalloc_hyp_page,
 		.phys_to_virt = hyp_phys_to_virt,