diff mbox series

[RFC,v2,10/26] KVM: arm64: Introduce an early Hyp page allocator

Message ID 20210108121524.656872-11-qperret@google.com (mailing list archive)
State New, archived
Headers show
Series KVM/arm64: A stage 2 for the host | expand

Commit Message

Quentin Perret Jan. 8, 2021, 12:15 p.m. UTC
With nVHE, the host currently creates all s1 hypervisor mappings at EL1
during boot, installs them at EL2, and extends them as required (e.g.
when creating a new VM). But in a world where the host is no longer
trusted, it cannot have full control over the code mapped in the
hypervisor.

In preparation for enabling the hypervisor to create its own s1 mappings
during boot, introduce an early page allocator, with minimal
functionality. This allocator is designed to be used only during early
bootstrap of the hyp code when memory protection is enabled, which will
then switch to using a full-fledged page allocator after init.

Signed-off-by: Quentin Perret <qperret@google.com>
---
 arch/arm64/kvm/hyp/include/nvhe/early_alloc.h | 14 +++++
 arch/arm64/kvm/hyp/include/nvhe/memory.h      | 24 ++++++++
 arch/arm64/kvm/hyp/nvhe/Makefile              |  2 +-
 arch/arm64/kvm/hyp/nvhe/early_alloc.c         | 60 +++++++++++++++++++
 arch/arm64/kvm/hyp/nvhe/psci-relay.c          |  4 +-
 5 files changed, 100 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm64/kvm/hyp/include/nvhe/early_alloc.h
 create mode 100644 arch/arm64/kvm/hyp/include/nvhe/memory.h
 create mode 100644 arch/arm64/kvm/hyp/nvhe/early_alloc.c

Comments

Will Deacon Feb. 1, 2021, 7 p.m. UTC | #1
On Fri, Jan 08, 2021 at 12:15:08PM +0000, Quentin Perret wrote:
> diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
> new file mode 100644
> index 000000000000..de4c45662970
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
> @@ -0,0 +1,60 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2020 Google LLC
> + * Author: Quentin Perret <qperret@google.com>
> + */
> +
> +#include <asm/kvm_pgtable.h>
> +
> +#include <nvhe/memory.h>
> +
> +struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops;
> +s64 __ro_after_init hyp_physvirt_offset;
> +
> +static unsigned long base;
> +static unsigned long end;
> +static unsigned long cur;
> +
> +unsigned long hyp_early_alloc_nr_pages(void)
> +{
> +	return (cur - base) >> PAGE_SHIFT;
> +}

nit: but I find this function name confusing (it's returning the number of
_allocated_ pages, not the number of _free_ pages!). How about something
like hyp_early_alloc_size() to match hyp_s1_pgtable_size() which you add
later? [and move the shift out to the caller]?

> +
> +extern void clear_page(void *to);

Stick this in a header?

> +
> +void *hyp_early_alloc_contig(unsigned int nr_pages)

I think order might make more sense, or do you need to allocate
non-power-of-2 batches of pages?

> +{
> +	unsigned long ret = cur, i, p;
> +
> +	if (!nr_pages)
> +		return NULL;
> +
> +	cur += nr_pages << PAGE_SHIFT;
> +	if (cur > end) {

This would mean that concurrent hyp_early_alloc_nr_pages() would transiently
give the wrong answer. Might be worth sticking the locking expectations with
the function prototypes.

That said, maybe it would be better to write this check as:

	if (end - cur < (nr_pages << PAGE_SHIFT))

as that also removes the need to worry about overflow if nr_pages is huge
(which would be a bug in the hypervisor, which we would then catch here).

> +		cur = ret;
> +		return NULL;
> +	}
> +
> +	for (i = 0; i < nr_pages; i++) {
> +		p = ret + (i << PAGE_SHIFT);
> +		clear_page((void *)(p));
> +	}
> +
> +	return (void *)ret;
> +}
> +
> +void *hyp_early_alloc_page(void *arg)
> +{
> +	return hyp_early_alloc_contig(1);
> +}
> +
> +void hyp_early_alloc_init(unsigned long virt, unsigned long size)
> +{
> +	base = virt;
> +	end = virt + size;
> +	cur = virt;

nit: base = cur = virt;

Will
Quentin Perret Feb. 2, 2021, 9:44 a.m. UTC | #2
On Monday 01 Feb 2021 at 19:00:08 (+0000), Will Deacon wrote:
> On Fri, Jan 08, 2021 at 12:15:08PM +0000, Quentin Perret wrote:
> > diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
> > new file mode 100644
> > index 000000000000..de4c45662970
> > --- /dev/null
> > +++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
> > @@ -0,0 +1,60 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2020 Google LLC
> > + * Author: Quentin Perret <qperret@google.com>
> > + */
> > +
> > +#include <asm/kvm_pgtable.h>
> > +
> > +#include <nvhe/memory.h>
> > +
> > +struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops;
> > +s64 __ro_after_init hyp_physvirt_offset;
> > +
> > +static unsigned long base;
> > +static unsigned long end;
> > +static unsigned long cur;
> > +
> > +unsigned long hyp_early_alloc_nr_pages(void)
> > +{
> > +	return (cur - base) >> PAGE_SHIFT;
> > +}
> 
> nit: but I find this function name confusing (it's returning the number of
> _allocated_ pages, not the number of _free_ pages!). How about something
> like hyp_early_alloc_size() to match hyp_s1_pgtable_size() which you add
> later? [and move the shift out to the caller]?

Works for me.

> > +extern void clear_page(void *to);
> 
> Stick this in a header?

Right, that, or perhaps just use asm/page.h directly -- I _think_ that
should work fine assuming with have the correct symbol aliasing in
place.

> > +
> > +void *hyp_early_alloc_contig(unsigned int nr_pages)
> 
> I think order might make more sense, or do you need to allocate
> non-power-of-2 batches of pages?

Indeed, I allocate page-aligned blobs of arbitrary size (e.g.
divide_memory_pool() in patch 16), so I prefer it that way.

> > +{
> > +	unsigned long ret = cur, i, p;
> > +
> > +	if (!nr_pages)
> > +		return NULL;
> > +
> > +	cur += nr_pages << PAGE_SHIFT;
> > +	if (cur > end) {
> 
> This would mean that concurrent hyp_early_alloc_nr_pages() would transiently
> give the wrong answer. Might be worth sticking the locking expectations with
> the function prototypes.

This is only called from a single CPU from a non-preemptible section, so
that is not a problem. But yes, I'll stick a comment.

> That said, maybe it would be better to write this check as:
> 
> 	if (end - cur < (nr_pages << PAGE_SHIFT))
> 
> as that also removes the need to worry about overflow if nr_pages is huge
> (which would be a bug in the hypervisor, which we would then catch here).

Sounds good.

> > +		cur = ret;
> > +		return NULL;
> > +	}
> > +
> > +	for (i = 0; i < nr_pages; i++) {
> > +		p = ret + (i << PAGE_SHIFT);
> > +		clear_page((void *)(p));
> > +	}
> > +
> > +	return (void *)ret;
> > +}
> > +
> > +void *hyp_early_alloc_page(void *arg)
> > +{
> > +	return hyp_early_alloc_contig(1);
> > +}
> > +
> > +void hyp_early_alloc_init(unsigned long virt, unsigned long size)
> > +{
> > +	base = virt;
> > +	end = virt + size;
> > +	cur = virt;
> 
> nit: base = cur = virt;

Ack.

Thanks for the review,
Quentin
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h b/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h
new file mode 100644
index 000000000000..68ce2bf9a718
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h
@@ -0,0 +1,14 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __KVM_HYP_EARLY_ALLOC_H
+#define __KVM_HYP_EARLY_ALLOC_H
+
+#include <asm/kvm_pgtable.h>
+
+void hyp_early_alloc_init(void *virt, unsigned long size);
+unsigned long hyp_early_alloc_nr_pages(void);
+void *hyp_early_alloc_page(void *arg);
+void *hyp_early_alloc_contig(unsigned int nr_pages);
+
+extern struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops;
+
+#endif /* __KVM_HYP_EARLY_ALLOC_H */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
new file mode 100644
index 000000000000..64c44c142c95
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -0,0 +1,24 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __KVM_HYP_MEMORY_H
+#define __KVM_HYP_MEMORY_H
+
+#include <asm/page.h>
+
+#include <linux/types.h>
+
+extern s64 hyp_physvirt_offset;
+
+#define __hyp_pa(virt)	((phys_addr_t)(virt) + hyp_physvirt_offset)
+#define __hyp_va(virt)	((void *)((phys_addr_t)(virt) - hyp_physvirt_offset))
+
+static inline void *hyp_phys_to_virt(phys_addr_t phys)
+{
+	return __hyp_va(phys);
+}
+
+static inline phys_addr_t hyp_virt_to_phys(void *addr)
+{
+	return __hyp_pa(addr);
+}
+
+#endif /* __KVM_HYP_MEMORY_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 590fdefb42dd..1fc0684a7678 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -10,7 +10,7 @@  lib-objs := clear_page.o copy_page.o memcpy.o memset.o
 lib-objs := $(addprefix ../../../lib/, $(lib-objs))
 
 obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
-	 hyp-main.o hyp-smp.o psci-relay.o
+	 hyp-main.o hyp-smp.o psci-relay.o early_alloc.o
 obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
 	 ../fpsimd.o ../hyp-entry.o ../exception.o
 obj-y += $(lib-objs)
diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
new file mode 100644
index 000000000000..de4c45662970
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
@@ -0,0 +1,60 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+
+#include <asm/kvm_pgtable.h>
+
+#include <nvhe/memory.h>
+
+struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops;
+s64 __ro_after_init hyp_physvirt_offset;
+
+static unsigned long base;
+static unsigned long end;
+static unsigned long cur;
+
+unsigned long hyp_early_alloc_nr_pages(void)
+{
+	return (cur - base) >> PAGE_SHIFT;
+}
+
+extern void clear_page(void *to);
+
+void *hyp_early_alloc_contig(unsigned int nr_pages)
+{
+	unsigned long ret = cur, i, p;
+
+	if (!nr_pages)
+		return NULL;
+
+	cur += nr_pages << PAGE_SHIFT;
+	if (cur > end) {
+		cur = ret;
+		return NULL;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		p = ret + (i << PAGE_SHIFT);
+		clear_page((void *)(p));
+	}
+
+	return (void *)ret;
+}
+
+void *hyp_early_alloc_page(void *arg)
+{
+	return hyp_early_alloc_contig(1);
+}
+
+void hyp_early_alloc_init(unsigned long virt, unsigned long size)
+{
+	base = virt;
+	end = virt + size;
+	cur = virt;
+
+	hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page;
+	hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt;
+	hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys;
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index e3947846ffcb..bdd8054bce4c 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -11,6 +11,7 @@ 
 #include <linux/kvm_host.h>
 #include <uapi/linux/psci.h>
 
+#include <nvhe/memory.h>
 #include <nvhe/trap_handler.h>
 
 void kvm_hyp_cpu_entry(unsigned long r0);
@@ -20,9 +21,6 @@  void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
 
 /* Config options set by the host. */
 struct kvm_host_psci_config __ro_after_init kvm_host_psci_config;
-s64 __ro_after_init hyp_physvirt_offset;
-
-#define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset)
 
 #define INVALID_CPU_ID	UINT_MAX