diff mbox series

[v2,14/24] KVM: arm64: Add pcpu fixmap infrastructure at EL2

Message ID 20220630135747.26983-15-will@kernel.org (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Introduce pKVM shadow state at EL2 | expand

Commit Message

Will Deacon June 30, 2022, 1:57 p.m. UTC
From: Quentin Perret <qperret@google.com>

We will soon need to temporarily map pages into the hypervisor stage-1
in nVHE protected mode. To do this efficiently, let's introduce a
per-cpu fixmap allowing to map a single page without needing to take any
lock or to allocate memory.

Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  2 +
 arch/arm64/kvm/hyp/include/nvhe/mm.h          |  4 ++
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         |  1 -
 arch/arm64/kvm/hyp/nvhe/mm.c                  | 72 +++++++++++++++++++
 arch/arm64/kvm/hyp/nvhe/setup.c               |  4 ++
 5 files changed, 82 insertions(+), 1 deletion(-)

Comments

Vincent Donnefort July 19, 2022, 1:30 p.m. UTC | #1
>  static struct hyp_pool host_s2_pool;
> diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
> index d3a3b47181de..17d689483ec4 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mm.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mm.c
> @@ -14,6 +14,7 @@
>  #include <nvhe/early_alloc.h>
>  #include <nvhe/gfp.h>
>  #include <nvhe/memory.h>
> +#include <nvhe/mem_protect.h>
>  #include <nvhe/mm.h>
>  #include <nvhe/spinlock.h>
>  
> @@ -24,6 +25,7 @@ struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
>  unsigned int hyp_memblock_nr;
>  
>  static u64 __io_map_base;
> +static DEFINE_PER_CPU(void *, hyp_fixmap_base);
>  
>  static int __pkvm_create_mappings(unsigned long start, unsigned long size,
>  				  unsigned long phys, enum kvm_pgtable_prot prot)
> @@ -212,6 +214,76 @@ int hyp_map_vectors(void)
>  	return 0;
>  }
>  
> +void *hyp_fixmap_map(phys_addr_t phys)
> +{
> +	void *addr = *this_cpu_ptr(&hyp_fixmap_base);
> +	int ret = kvm_pgtable_hyp_map(&pkvm_pgtable, (u64)addr, PAGE_SIZE,
> +				      phys, PAGE_HYP);
> +	return ret ? NULL : addr;
> +}
> +
> +int hyp_fixmap_unmap(void)
> +{
> +	void *addr = *this_cpu_ptr(&hyp_fixmap_base);
> +	int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, (u64)addr, PAGE_SIZE);
> +
> +	return (ret != PAGE_SIZE) ? -EINVAL : 0;
> +}
> +

I probably missed something but as the pagetable pages for this mapping are
pined, it seems impossible (currently) for this call to fail. Maybe a WARN_ON
would be more appropriate, especially the callers in the subsequent patches do
not seem to check for this function return value?

[...]
Quentin Perret July 19, 2022, 2:09 p.m. UTC | #2
On Tue, Jul 19, 2022 at 3:30 PM Vincent Donnefort <vdonnefort@google.com> wrote:
>
> >  static struct hyp_pool host_s2_pool;
> > diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
> > index d3a3b47181de..17d689483ec4 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/mm.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/mm.c
> > @@ -14,6 +14,7 @@
> >  #include <nvhe/early_alloc.h>
> >  #include <nvhe/gfp.h>
> >  #include <nvhe/memory.h>
> > +#include <nvhe/mem_protect.h>
> >  #include <nvhe/mm.h>
> >  #include <nvhe/spinlock.h>
> >
> > @@ -24,6 +25,7 @@ struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
> >  unsigned int hyp_memblock_nr;
> >
> >  static u64 __io_map_base;
> > +static DEFINE_PER_CPU(void *, hyp_fixmap_base);
> >
> >  static int __pkvm_create_mappings(unsigned long start, unsigned long size,
> >                                 unsigned long phys, enum kvm_pgtable_prot prot)
> > @@ -212,6 +214,76 @@ int hyp_map_vectors(void)
> >       return 0;
> >  }
> >
> > +void *hyp_fixmap_map(phys_addr_t phys)
> > +{
> > +     void *addr = *this_cpu_ptr(&hyp_fixmap_base);
> > +     int ret = kvm_pgtable_hyp_map(&pkvm_pgtable, (u64)addr, PAGE_SIZE,
> > +                                   phys, PAGE_HYP);
> > +     return ret ? NULL : addr;
> > +}
> > +
> > +int hyp_fixmap_unmap(void)
> > +{
> > +     void *addr = *this_cpu_ptr(&hyp_fixmap_base);
> > +     int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, (u64)addr, PAGE_SIZE);
> > +
> > +     return (ret != PAGE_SIZE) ? -EINVAL : 0;
> > +}
> > +
>
> I probably missed something but as the pagetable pages for this mapping are
> pined, it seems impossible (currently) for this call to fail. Maybe a WARN_ON
> would be more appropriate, especially the callers in the subsequent patches do
> not seem to check for this function return value?

Right, I think that wouldn't hurt. And while looking at this, I
actually think we could get rid of these calls to the map/unmap
functions entirely by keeping the pointers to the reserved PTEs
directly in addition to the VA to which they correspond in the percpu
table. That way we could manipulate the PTEs directly and avoid
unnecessary pgtable walks. Bits [63:1] can probably remain untouched,
and {un}mapping is then only a matter of flipping bit 0 in the PTE
(and TLBI on the unmap path). I'll have a go at it.

Cheers,
Quentin
Quentin Perret July 19, 2022, 2:10 p.m. UTC | #3
On Tue, Jul 19, 2022 at 4:09 PM Quentin Perret <qperret@google.com> wrote:
>
> On Tue, Jul 19, 2022 at 3:30 PM Vincent Donnefort <vdonnefort@google.com> wrote:
> >
> > >  static struct hyp_pool host_s2_pool;
> > > diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
> > > index d3a3b47181de..17d689483ec4 100644
> > > --- a/arch/arm64/kvm/hyp/nvhe/mm.c
> > > +++ b/arch/arm64/kvm/hyp/nvhe/mm.c
> > > @@ -14,6 +14,7 @@
> > >  #include <nvhe/early_alloc.h>
> > >  #include <nvhe/gfp.h>
> > >  #include <nvhe/memory.h>
> > > +#include <nvhe/mem_protect.h>
> > >  #include <nvhe/mm.h>
> > >  #include <nvhe/spinlock.h>
> > >
> > > @@ -24,6 +25,7 @@ struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
> > >  unsigned int hyp_memblock_nr;
> > >
> > >  static u64 __io_map_base;
> > > +static DEFINE_PER_CPU(void *, hyp_fixmap_base);
> > >
> > >  static int __pkvm_create_mappings(unsigned long start, unsigned long size,
> > >                                 unsigned long phys, enum kvm_pgtable_prot prot)
> > > @@ -212,6 +214,76 @@ int hyp_map_vectors(void)
> > >       return 0;
> > >  }
> > >
> > > +void *hyp_fixmap_map(phys_addr_t phys)
> > > +{
> > > +     void *addr = *this_cpu_ptr(&hyp_fixmap_base);
> > > +     int ret = kvm_pgtable_hyp_map(&pkvm_pgtable, (u64)addr, PAGE_SIZE,
> > > +                                   phys, PAGE_HYP);
> > > +     return ret ? NULL : addr;
> > > +}
> > > +
> > > +int hyp_fixmap_unmap(void)
> > > +{
> > > +     void *addr = *this_cpu_ptr(&hyp_fixmap_base);
> > > +     int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, (u64)addr, PAGE_SIZE);
> > > +
> > > +     return (ret != PAGE_SIZE) ? -EINVAL : 0;
> > > +}
> > > +
> >
> > I probably missed something but as the pagetable pages for this mapping are
> > pined, it seems impossible (currently) for this call to fail. Maybe a WARN_ON
> > would be more appropriate, especially the callers in the subsequent patches do
> > not seem to check for this function return value?
>
> Right, I think that wouldn't hurt. And while looking at this, I
> actually think we could get rid of these calls to the map/unmap
> functions entirely by keeping the pointers to the reserved PTEs
> directly in addition to the VA to which they correspond in the percpu
> table. That way we could manipulate the PTEs directly and avoid
> unnecessary pgtable walks. Bits [63:1] can probably remain untouched,

 Well, the address bits need to change too obviously, but rest can stay.

> and {un}mapping is then only a matter of flipping bit 0 in the PTE
> (and TLBI on the unmap path). I'll have a go at it.
>
> Cheers,
> Quentin
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 3a0817b5c739..d11d9d68a680 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -59,6 +59,8 @@  enum pkvm_component_id {
 	PKVM_ID_HYP,
 };
 
+extern unsigned long hyp_nr_cpus;
+
 int __pkvm_prot_finalize(void);
 int __pkvm_host_share_hyp(u64 pfn);
 int __pkvm_host_unshare_hyp(u64 pfn);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index b2ee6d5df55b..882c5711eda5 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -13,6 +13,10 @@ 
 extern struct kvm_pgtable pkvm_pgtable;
 extern hyp_spinlock_t pkvm_pgd_lock;
 
+int hyp_create_pcpu_fixmap(void);
+void *hyp_fixmap_map(phys_addr_t phys);
+int hyp_fixmap_unmap(void);
+
 int hyp_create_idmap(u32 hyp_va_bits);
 int hyp_map_vectors(void);
 int hyp_back_vmemmap(phys_addr_t back);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 9baf731736be..a0af23de2640 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -21,7 +21,6 @@ 
 
 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP)
 
-extern unsigned long hyp_nr_cpus;
 struct host_kvm host_kvm;
 
 static struct hyp_pool host_s2_pool;
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index d3a3b47181de..17d689483ec4 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -14,6 +14,7 @@ 
 #include <nvhe/early_alloc.h>
 #include <nvhe/gfp.h>
 #include <nvhe/memory.h>
+#include <nvhe/mem_protect.h>
 #include <nvhe/mm.h>
 #include <nvhe/spinlock.h>
 
@@ -24,6 +25,7 @@  struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
 unsigned int hyp_memblock_nr;
 
 static u64 __io_map_base;
+static DEFINE_PER_CPU(void *, hyp_fixmap_base);
 
 static int __pkvm_create_mappings(unsigned long start, unsigned long size,
 				  unsigned long phys, enum kvm_pgtable_prot prot)
@@ -212,6 +214,76 @@  int hyp_map_vectors(void)
 	return 0;
 }
 
+void *hyp_fixmap_map(phys_addr_t phys)
+{
+	void *addr = *this_cpu_ptr(&hyp_fixmap_base);
+	int ret = kvm_pgtable_hyp_map(&pkvm_pgtable, (u64)addr, PAGE_SIZE,
+				      phys, PAGE_HYP);
+	return ret ? NULL : addr;
+}
+
+int hyp_fixmap_unmap(void)
+{
+	void *addr = *this_cpu_ptr(&hyp_fixmap_base);
+	int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, (u64)addr, PAGE_SIZE);
+
+	return (ret != PAGE_SIZE) ? -EINVAL : 0;
+}
+
+static int __pin_pgtable_cb(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+			    enum kvm_pgtable_walk_flags flag, void * const arg)
+{
+	if (!kvm_pte_valid(*ptep) || level != KVM_PGTABLE_MAX_LEVELS - 1)
+		return -EINVAL;
+	hyp_page_ref_inc(hyp_virt_to_page(ptep));
+
+	return 0;
+}
+
+static int hyp_pin_pgtable_pages(u64 addr)
+{
+	struct kvm_pgtable_walker walker = {
+		.cb	= __pin_pgtable_cb,
+		.flags	= KVM_PGTABLE_WALK_LEAF,
+	};
+
+	return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker);
+}
+
+int hyp_create_pcpu_fixmap(void)
+{
+	unsigned long addr, i;
+	int ret;
+
+	for (i = 0; i < hyp_nr_cpus; i++) {
+		ret = pkvm_alloc_private_va_range(PAGE_SIZE, &addr);
+		if (ret)
+			return ret;
+
+		/*
+		 * Create a dummy mapping, to get the intermediate page-table
+		 * pages allocated, then take a reference on the last level
+		 * page to keep it around at all times.
+		 */
+		ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE,
+					  __hyp_pa(__hyp_bss_start), PAGE_HYP);
+		if (ret)
+			return ret;
+
+		ret = hyp_pin_pgtable_pages(addr);
+		if (ret)
+			return ret;
+
+		ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, PAGE_SIZE);
+		if (ret != PAGE_SIZE)
+			return -EINVAL;
+
+		*per_cpu_ptr(&hyp_fixmap_base, i) = (void *)addr;
+	}
+
+	return 0;
+}
+
 int hyp_create_idmap(u32 hyp_va_bits)
 {
 	unsigned long start, end;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index fb0eff15a89f..3f689ffb2693 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -321,6 +321,10 @@  void __noreturn __pkvm_init_finalise(void)
 	if (ret)
 		goto out;
 
+	ret = hyp_create_pcpu_fixmap();
+	if (ret)
+		goto out;
+
 	hyp_shadow_table_init(shadow_table_base);
 out:
 	/*