diff mbox series

[09/14] KVM: arm64: Mark host bss and rodata section as shared

Message ID 20210719104735.3681732-10-qperret@google.com (mailing list archive)
State New, archived
Headers show
Series Track shared pages at EL2 in protected mode | expand

Commit Message

Quentin Perret July 19, 2021, 10:47 a.m. UTC
As the hypervisor maps the host's .bss and .rodata sections in its
stage-1, make sure to tag them as shared in hyp and host page-tables.

But since the hypervisor relies on the presence of these mappings, we
cannot let the host in complete control of the memory regions -- it
must not unshare or donate them to another entity for example. To
prevent this, let's transfer the ownership of those ranges to the
hypervisor itself, and share the page back with the host.

Signed-off-by: Quentin Perret <qperret@google.com>
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  1 +
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         |  7 ++-
 arch/arm64/kvm/hyp/nvhe/setup.c               | 52 ++++++++++++++++---
 3 files changed, 51 insertions(+), 9 deletions(-)

Comments

Marc Zyngier July 19, 2021, 3:01 p.m. UTC | #1
On Mon, 19 Jul 2021 11:47:30 +0100,
Quentin Perret <qperret@google.com> wrote:
> 
> As the hypervisor maps the host's .bss and .rodata sections in its
> stage-1, make sure to tag them as shared in hyp and host page-tables.
> 
> But since the hypervisor relies on the presence of these mappings, we
> cannot let the host in complete control of the memory regions -- it
> must not unshare or donate them to another entity for example. To
> prevent this, let's transfer the ownership of those ranges to the
> hypervisor itself, and share the page back with the host.
> 
> Signed-off-by: Quentin Perret <qperret@google.com>
> ---
>  arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  1 +
>  arch/arm64/kvm/hyp/nvhe/mem_protect.c         |  7 ++-
>  arch/arm64/kvm/hyp/nvhe/setup.c               | 52 ++++++++++++++++---
>  3 files changed, 51 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> index 9c227d87c36d..b39047463075 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
> @@ -23,6 +23,7 @@ extern struct host_kvm host_kvm;
>  int __pkvm_prot_finalize(void);
>  int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
>  
> +int host_stage2_idmap_locked(u64 start, u64 end, enum kvm_pgtable_prot prot);
>  int kvm_host_prepare_stage2(void *pgt_pool_base);
>  void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
>  
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index cdace80d3e28..6f28edf58407 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -235,6 +235,11 @@ static bool host_stage2_want_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pro
>  		return prot != KVM_PGTABLE_PROT_RW;
>  }
>  
> +int host_stage2_idmap_locked(u64 start, u64 end, enum kvm_pgtable_prot prot)
> +{
> +	return host_stage2_try(__host_stage2_idmap, start, end, prot);
> +}
> +
>  static int host_stage2_idmap(u64 addr)
>  {
>  	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RW;
> @@ -250,7 +255,7 @@ static int host_stage2_idmap(u64 addr)
>  	if (ret)
>  		goto unlock;
>  
> -	ret = host_stage2_try(__host_stage2_idmap, range.start, range.end, prot);
> +	ret = host_stage2_idmap_locked(range.start, range.end, prot);
>  unlock:
>  	hyp_spin_unlock(&host_kvm.lock);
>  
> diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
> index 0b574d106519..74dce83a6fad 100644
> --- a/arch/arm64/kvm/hyp/nvhe/setup.c
> +++ b/arch/arm64/kvm/hyp/nvhe/setup.c
> @@ -83,10 +83,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
>  	if (ret)
>  		return ret;
>  
> -	ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO);
> -	if (ret)
> -		return ret;
> -
>  	ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO);
>  	if (ret)
>  		return ret;
> @@ -95,10 +91,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
>  	if (ret)
>  		return ret;
>  
> -	ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO);
> -	if (ret)
> -		return ret;
> -
>  	ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP);
>  	if (ret)
>  		return ret;
> @@ -117,6 +109,25 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
>  			return ret;
>  	}
>  
> +	/*
> +	 * Map the host's .bss and .rodata sections RO in the hypervisor, but
> +	 * transfer the ownerhsip from the host to the hypervisor itself to
> +	 * make sure it can't be donated or shared with another entity.
> +	 *
> +	 * The ownership transtion requires matching changes in the host
> +	 * stage-2. This will done later (see finalize_mappings()) once the
> +	 * hyp_vmemmap is addressable.
> +	 */
> +	ret = pkvm_create_mappings(__start_rodata, __end_rodata,
> +				   PAGE_HYP_RO | KVM_PGTABLE_STATE_SHARED);
> +	if (ret)
> +		return ret;
> +
> +	ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop,
> +				   PAGE_HYP_RO | KVM_PGTABLE_STATE_SHARED);
> +	if (ret)
> +		return ret;
> +
>  	return 0;
>  }
>  
> @@ -148,6 +159,27 @@ static void hpool_put_page(void *addr)
>  	hyp_put_page(&hpool, addr);
>  }
>  
> +static int finalize_mappings(void)
> +{
> +	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX;
> +	int ret;
> +
> +	/*
> +	 * The host's .bss and .rodata sections are now conceptually owned by
> +	 * the hypervisor, so mark them as 'borrowed' in the host stage-2. We
> +	 * can safely use host_stage2_idmap_locked() at this point since the
> +	 * host stage-2 has not been enabled yet.
> +	 */
> +	prot |= KVM_PGTABLE_STATE_SHARED | KVM_PGTABLE_STATE_BORROWED;
> +	ret = host_stage2_idmap_locked(__hyp_pa(__start_rodata),
> +				       __hyp_pa(__end_rodata), prot);

Do we really want to map the rodata section as RWX?

> +	if (ret)
> +		return ret;
> +
> +	return host_stage2_idmap_locked(__hyp_pa(__hyp_bss_end),
> +					__hyp_pa(__bss_stop), prot);

If the 'locked' state implies SHARED+BORROWED, maybe consider moving
the ORRing of the prot into host_stage2_idmap_locked()?

> +}
> +
>  void __noreturn __pkvm_init_finalise(void)
>  {
>  	struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
> @@ -167,6 +199,10 @@ void __noreturn __pkvm_init_finalise(void)
>  	if (ret)
>  		goto out;
>  
> +	ret = finalize_mappings();
> +	if (ret)
> +		goto out;
> +
>  	pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
>  		.zalloc_page = hyp_zalloc_hyp_page,
>  		.phys_to_virt = hyp_phys_to_virt,

Thanks,

	M.
Quentin Perret July 19, 2021, 3:56 p.m. UTC | #2
On Monday 19 Jul 2021 at 16:01:40 (+0100), Marc Zyngier wrote:
> On Mon, 19 Jul 2021 11:47:30 +0100,
> Quentin Perret <qperret@google.com> wrote:
> > +static int finalize_mappings(void)
> > +{
> > +	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX;
> > +	int ret;
> > +
> > +	/*
> > +	 * The host's .bss and .rodata sections are now conceptually owned by
> > +	 * the hypervisor, so mark them as 'borrowed' in the host stage-2. We
> > +	 * can safely use host_stage2_idmap_locked() at this point since the
> > +	 * host stage-2 has not been enabled yet.
> > +	 */
> > +	prot |= KVM_PGTABLE_STATE_SHARED | KVM_PGTABLE_STATE_BORROWED;
> > +	ret = host_stage2_idmap_locked(__hyp_pa(__start_rodata),
> > +				       __hyp_pa(__end_rodata), prot);
> 
> Do we really want to map the rodata section as RWX?

I know, feels odd, but for now I think so. The host is obviously
welcome to restrict things in its stage-1, but for stage-2, this is
just 'memory' so far, the host is allowed to patch it if it wants too.

Eventually, yes, I think we should make it RO in the host stage-2, but
maybe that's for another series?

> > +	if (ret)
> > +		return ret;
> > +
> > +	return host_stage2_idmap_locked(__hyp_pa(__hyp_bss_end),
> > +					__hyp_pa(__bss_stop), prot);
> 
> If the 'locked' state implies SHARED+BORROWED, maybe consider moving
> the ORRing of the prot into host_stage2_idmap_locked()?

Ah no, sorry for the confusion, but 'locked' means that we already hold
the pgtable lock. That is not actually true here, but this is a special
case as only the current CPU can be messing with it at this point in
time so taking the lock would just be wasted cycles.

> > +}
> > +
> >  void __noreturn __pkvm_init_finalise(void)
> >  {
> >  	struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
> > @@ -167,6 +199,10 @@ void __noreturn __pkvm_init_finalise(void)
> >  	if (ret)
> >  		goto out;
> >  
> > +	ret = finalize_mappings();
> > +	if (ret)
> > +		goto out;
> > +
> >  	pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
> >  		.zalloc_page = hyp_zalloc_hyp_page,
> >  		.phys_to_virt = hyp_phys_to_virt,

Thanks,
Quentin
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 9c227d87c36d..b39047463075 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -23,6 +23,7 @@  extern struct host_kvm host_kvm;
 int __pkvm_prot_finalize(void);
 int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
 
+int host_stage2_idmap_locked(u64 start, u64 end, enum kvm_pgtable_prot prot);
 int kvm_host_prepare_stage2(void *pgt_pool_base);
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index cdace80d3e28..6f28edf58407 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -235,6 +235,11 @@  static bool host_stage2_want_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pro
 		return prot != KVM_PGTABLE_PROT_RW;
 }
 
+int host_stage2_idmap_locked(u64 start, u64 end, enum kvm_pgtable_prot prot)
+{
+	return host_stage2_try(__host_stage2_idmap, start, end, prot);
+}
+
 static int host_stage2_idmap(u64 addr)
 {
 	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RW;
@@ -250,7 +255,7 @@  static int host_stage2_idmap(u64 addr)
 	if (ret)
 		goto unlock;
 
-	ret = host_stage2_try(__host_stage2_idmap, range.start, range.end, prot);
+	ret = host_stage2_idmap_locked(range.start, range.end, prot);
 unlock:
 	hyp_spin_unlock(&host_kvm.lock);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 0b574d106519..74dce83a6fad 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -83,10 +83,6 @@  static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 	if (ret)
 		return ret;
 
-	ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO);
-	if (ret)
-		return ret;
-
 	ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO);
 	if (ret)
 		return ret;
@@ -95,10 +91,6 @@  static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 	if (ret)
 		return ret;
 
-	ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO);
-	if (ret)
-		return ret;
-
 	ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP);
 	if (ret)
 		return ret;
@@ -117,6 +109,25 @@  static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 			return ret;
 	}
 
+	/*
+	 * Map the host's .bss and .rodata sections RO in the hypervisor, but
+	 * transfer the ownerhsip from the host to the hypervisor itself to
+	 * make sure it can't be donated or shared with another entity.
+	 *
+	 * The ownership transtion requires matching changes in the host
+	 * stage-2. This will done later (see finalize_mappings()) once the
+	 * hyp_vmemmap is addressable.
+	 */
+	ret = pkvm_create_mappings(__start_rodata, __end_rodata,
+				   PAGE_HYP_RO | KVM_PGTABLE_STATE_SHARED);
+	if (ret)
+		return ret;
+
+	ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop,
+				   PAGE_HYP_RO | KVM_PGTABLE_STATE_SHARED);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -148,6 +159,27 @@  static void hpool_put_page(void *addr)
 	hyp_put_page(&hpool, addr);
 }
 
+static int finalize_mappings(void)
+{
+	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX;
+	int ret;
+
+	/*
+	 * The host's .bss and .rodata sections are now conceptually owned by
+	 * the hypervisor, so mark them as 'borrowed' in the host stage-2. We
+	 * can safely use host_stage2_idmap_locked() at this point since the
+	 * host stage-2 has not been enabled yet.
+	 */
+	prot |= KVM_PGTABLE_STATE_SHARED | KVM_PGTABLE_STATE_BORROWED;
+	ret = host_stage2_idmap_locked(__hyp_pa(__start_rodata),
+				       __hyp_pa(__end_rodata), prot);
+	if (ret)
+		return ret;
+
+	return host_stage2_idmap_locked(__hyp_pa(__hyp_bss_end),
+					__hyp_pa(__bss_stop), prot);
+}
+
 void __noreturn __pkvm_init_finalise(void)
 {
 	struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@@ -167,6 +199,10 @@  void __noreturn __pkvm_init_finalise(void)
 	if (ret)
 		goto out;
 
+	ret = finalize_mappings();
+	if (ret)
+		goto out;
+
 	pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
 		.zalloc_page = hyp_zalloc_hyp_page,
 		.phys_to_virt = hyp_phys_to_virt,