Message ID | 20241216175803.2716565-19-qperret@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | KVM: arm64: Non-protected guest stage-2 support for pKVM | expand |
On Mon, 16 Dec 2024 at 17:58, Quentin Perret <qperret@google.com> wrote: > > Introduce the KVM_PGT_S2() helper macro to allow switching from the > traditional pgtable code to the pKVM version easily in mmu.c. The cost > of this 'indirection' is expected to be very minimal due to > is_protected_kvm_enabled() being backed by a static key. > > With this, everything is in place to allow the delegation of > non-protected guest stage-2 page-tables to pKVM, so let's stop using the > host's kvm_s2_mmu from EL2 and enjoy the ride. > > Signed-off-by: Quentin Perret <qperret@google.com> Reviewed-by: Fuad Tabba <tabba@google.com> Cheers, /fuad > --- > arch/arm64/include/asm/kvm_mmu.h | 16 +++++ > arch/arm64/kvm/arm.c | 9 ++- > arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 - > arch/arm64/kvm/mmu.c | 107 +++++++++++++++++++++-------- > 4 files changed, 101 insertions(+), 33 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h > index 66d93e320ec8..d116ab4230e8 100644 > --- a/arch/arm64/include/asm/kvm_mmu.h > +++ b/arch/arm64/include/asm/kvm_mmu.h > @@ -353,6 +353,22 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) > return &kvm->arch.mmu != mmu; > } > > +static inline void kvm_fault_lock(struct kvm *kvm) > +{ > + if (is_protected_kvm_enabled()) > + write_lock(&kvm->mmu_lock); > + else > + read_lock(&kvm->mmu_lock); > +} > + > +static inline void kvm_fault_unlock(struct kvm *kvm) > +{ > + if (is_protected_kvm_enabled()) > + write_unlock(&kvm->mmu_lock); > + else > + read_unlock(&kvm->mmu_lock); > +} > + > #ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS > void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); > #else > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index 55cc62b2f469..9bcbc7b8ed38 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -502,7 +502,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) > > void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > { > - kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); > + if (!is_protected_kvm_enabled()) > + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); > + else > + free_hyp_memcache(&vcpu->arch.pkvm_memcache); > kvm_timer_vcpu_terminate(vcpu); > kvm_pmu_vcpu_destroy(vcpu); > kvm_vgic_vcpu_destroy(vcpu); > @@ -574,6 +577,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > struct kvm_s2_mmu *mmu; > int *last_ran; > > + if (is_protected_kvm_enabled()) > + goto nommu; > + > if (vcpu_has_nv(vcpu)) > kvm_vcpu_load_hw_mmu(vcpu); > > @@ -594,6 +600,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > *last_ran = vcpu->vcpu_idx; > } > > +nommu: > vcpu->cpu = cpu; > > kvm_vgic_load(vcpu); > diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c > index 130f5f23bcb5..258d572eed62 100644 > --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c > +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c > @@ -103,8 +103,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) > /* Limit guest vector length to the maximum supported by the host. */ > hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl); > > - hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu; > - > hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; > hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE); > hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) & > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 641e4fec1659..7c2995cb4577 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -15,6 +15,7 @@ > #include <asm/kvm_arm.h> > #include <asm/kvm_mmu.h> > #include <asm/kvm_pgtable.h> > +#include <asm/kvm_pkvm.h> > #include <asm/kvm_ras.h> > #include <asm/kvm_asm.h> > #include <asm/kvm_emulate.h> > @@ -31,6 +32,14 @@ static phys_addr_t __ro_after_init hyp_idmap_vector; > > static unsigned long __ro_after_init io_map_base; > > +#define KVM_PGT_S2(fn, ...) \ > + ({ \ > + typeof(kvm_pgtable_stage2_ ## fn) *__fn = kvm_pgtable_stage2_ ## fn; \ > + if (is_protected_kvm_enabled()) \ > + __fn = pkvm_pgtable_ ## fn; \ > + __fn(__VA_ARGS__); \ > + }) > + > static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end, > phys_addr_t size) > { > @@ -147,7 +156,7 @@ static int kvm_mmu_split_huge_pages(struct kvm *kvm, phys_addr_t addr, > return -EINVAL; > > next = __stage2_range_addr_end(addr, end, chunk_size); > - ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache); > + ret = KVM_PGT_S2(split, pgt, addr, next - addr, cache); > if (ret) > break; > } while (addr = next, addr != end); > @@ -168,15 +177,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot) > */ > int kvm_arch_flush_remote_tlbs(struct kvm *kvm) > { > - kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); > + if (is_protected_kvm_enabled()) > + kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle); > + else > + kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); > return 0; > } > > int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, > gfn_t gfn, u64 nr_pages) > { > - kvm_tlb_flush_vmid_range(&kvm->arch.mmu, > - gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); > + u64 size = nr_pages << PAGE_SHIFT; > + u64 addr = gfn << PAGE_SHIFT; > + > + if (is_protected_kvm_enabled()) > + kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle); > + else > + kvm_tlb_flush_vmid_range(&kvm->arch.mmu, addr, size); > return 0; > } > > @@ -225,7 +242,7 @@ static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head) > void *pgtable = page_to_virt(page); > s8 level = page_private(page); > > - kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level); > + KVM_PGT_S2(free_unlinked, &kvm_s2_mm_ops, pgtable, level); > } > > static void stage2_free_unlinked_table(void *addr, s8 level) > @@ -280,6 +297,11 @@ static void invalidate_icache_guest_page(void *va, size_t size) > __invalidate_icache_guest_page(va, size); > } > > +static int kvm_s2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) > +{ > + return KVM_PGT_S2(unmap, pgt, addr, size); > +} > + > /* > * Unmapping vs dcache management: > * > @@ -324,8 +346,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 > > lockdep_assert_held_write(&kvm->mmu_lock); > WARN_ON(size & ~PAGE_MASK); > - WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap, > - may_block)); > + WARN_ON(stage2_apply_range(mmu, start, end, kvm_s2_unmap, may_block)); > } > > void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, > @@ -334,9 +355,14 @@ void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, > __unmap_stage2_range(mmu, start, size, may_block); > } > > +static int kvm_s2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) > +{ > + return KVM_PGT_S2(flush, pgt, addr, size); > +} > + > void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) > { > - stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_flush); > + stage2_apply_range_resched(mmu, addr, end, kvm_s2_flush); > } > > static void stage2_flush_memslot(struct kvm *kvm, > @@ -942,10 +968,14 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t > return -ENOMEM; > > mmu->arch = &kvm->arch; > - err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops); > + err = KVM_PGT_S2(init, pgt, mmu, &kvm_s2_mm_ops); > if (err) > goto out_free_pgtable; > > + mmu->pgt = pgt; > + if (is_protected_kvm_enabled()) > + return 0; > + > mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); > if (!mmu->last_vcpu_ran) { > err = -ENOMEM; > @@ -959,7 +989,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t > mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT; > mmu->split_page_cache.gfp_zero = __GFP_ZERO; > > - mmu->pgt = pgt; > mmu->pgd_phys = __pa(pgt->pgd); > > if (kvm_is_nested_s2_mmu(kvm, mmu)) > @@ -968,7 +997,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t > return 0; > > out_destroy_pgtable: > - kvm_pgtable_stage2_destroy(pgt); > + KVM_PGT_S2(destroy, pgt); > out_free_pgtable: > kfree(pgt); > return err; > @@ -1065,7 +1094,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) > write_unlock(&kvm->mmu_lock); > > if (pgt) { > - kvm_pgtable_stage2_destroy(pgt); > + KVM_PGT_S2(destroy, pgt); > kfree(pgt); > } > } > @@ -1082,9 +1111,11 @@ static void *hyp_mc_alloc_fn(void *unused) > > void free_hyp_memcache(struct kvm_hyp_memcache *mc) > { > - if (is_protected_kvm_enabled()) > - __free_hyp_memcache(mc, hyp_mc_free_fn, > - kvm_host_va, NULL); > + if (!is_protected_kvm_enabled()) > + return; > + > + kfree(mc->mapping); > + __free_hyp_memcache(mc, hyp_mc_free_fn, kvm_host_va, NULL); > } > > int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages) > @@ -1092,6 +1123,12 @@ int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages) > if (!is_protected_kvm_enabled()) > return 0; > > + if (!mc->mapping) { > + mc->mapping = kzalloc(sizeof(struct pkvm_mapping), GFP_KERNEL_ACCOUNT); > + if (!mc->mapping) > + return -ENOMEM; > + } > + > return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn, > kvm_host_pa, NULL); > } > @@ -1130,8 +1167,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > break; > > write_lock(&kvm->mmu_lock); > - ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot, > - &cache, 0); > + ret = KVM_PGT_S2(map, pgt, addr, PAGE_SIZE, pa, prot, &cache, 0); > write_unlock(&kvm->mmu_lock); > if (ret) > break; > @@ -1143,6 +1179,10 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > return ret; > } > > +static int kvm_s2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) > +{ > + return KVM_PGT_S2(wrprotect, pgt, addr, size); > +} > /** > * kvm_stage2_wp_range() - write protect stage2 memory region range > * @mmu: The KVM stage-2 MMU pointer > @@ -1151,7 +1191,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > */ > void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) > { > - stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect); > + stage2_apply_range_resched(mmu, addr, end, kvm_s2_wrprotect); > } > > /** > @@ -1442,9 +1482,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > unsigned long mmu_seq; > phys_addr_t ipa = fault_ipa; > struct kvm *kvm = vcpu->kvm; > - struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; > struct vm_area_struct *vma; > short vma_shift; > + void *memcache; > gfn_t gfn; > kvm_pfn_t pfn; > bool logging_active = memslot_is_logging(memslot); > @@ -1472,8 +1512,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > * and a write fault needs to collapse a block entry into a table. > */ > if (!fault_is_perm || (logging_active && write_fault)) { > - ret = kvm_mmu_topup_memory_cache(memcache, > - kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu)); > + int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); > + > + if (!is_protected_kvm_enabled()) { > + memcache = &vcpu->arch.mmu_page_cache; > + ret = kvm_mmu_topup_memory_cache(memcache, min_pages); > + } else { > + memcache = &vcpu->arch.pkvm_memcache; > + ret = topup_hyp_memcache(memcache, min_pages); > + } > if (ret) > return ret; > } > @@ -1494,7 +1541,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > * logging_active is guaranteed to never be true for VM_PFNMAP > * memslots. > */ > - if (logging_active) { > + if (logging_active || is_protected_kvm_enabled()) { > force_pte = true; > vma_shift = PAGE_SHIFT; > } else { > @@ -1634,7 +1681,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > prot |= kvm_encode_nested_level(nested); > } > > - read_lock(&kvm->mmu_lock); > + kvm_fault_lock(kvm); > pgt = vcpu->arch.hw_mmu->pgt; > if (mmu_invalidate_retry(kvm, mmu_seq)) { > ret = -EAGAIN; > @@ -1696,16 +1743,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > * PTE, which will be preserved. > */ > prot &= ~KVM_NV_GUEST_MAP_SZ; > - ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot, flags); > + ret = KVM_PGT_S2(relax_perms, pgt, fault_ipa, prot, flags); > } else { > - ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, > + ret = KVM_PGT_S2(map, pgt, fault_ipa, vma_pagesize, > __pfn_to_phys(pfn), prot, > memcache, flags); > } > > out_unlock: > kvm_release_faultin_page(kvm, page, !!ret, writable); > - read_unlock(&kvm->mmu_lock); > + kvm_fault_unlock(kvm); > > /* Mark the page dirty only if the fault is handled successfully */ > if (writable && !ret) > @@ -1724,7 +1771,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) > > read_lock(&vcpu->kvm->mmu_lock); > mmu = vcpu->arch.hw_mmu; > - kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa, flags); > + KVM_PGT_S2(mkyoung, mmu->pgt, fault_ipa, flags); > read_unlock(&vcpu->kvm->mmu_lock); > } > > @@ -1764,7 +1811,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) > } > > /* Falls between the IPA range and the PARange? */ > - if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) { > + if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) { > fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); > > if (is_iabt) > @@ -1930,7 +1977,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) > if (!kvm->arch.mmu.pgt) > return false; > > - return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, > + return KVM_PGT_S2(test_clear_young, kvm->arch.mmu.pgt, > range->start << PAGE_SHIFT, > size, true); > /* > @@ -1946,7 +1993,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) > if (!kvm->arch.mmu.pgt) > return false; > > - return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, > + return KVM_PGT_S2(test_clear_young, kvm->arch.mmu.pgt, > range->start << PAGE_SHIFT, > size, false); > } > -- > 2.47.1.613.gc27f4b7a9f-goog >
On Mon, 16 Dec 2024 17:58:03 +0000, Quentin Perret <qperret@google.com> wrote: > > Introduce the KVM_PGT_S2() helper macro to allow switching from the > traditional pgtable code to the pKVM version easily in mmu.c. The cost > of this 'indirection' is expected to be very minimal due to > is_protected_kvm_enabled() being backed by a static key. > > With this, everything is in place to allow the delegation of > non-protected guest stage-2 page-tables to pKVM, so let's stop using the > host's kvm_s2_mmu from EL2 and enjoy the ride. > > Signed-off-by: Quentin Perret <qperret@google.com> > --- > arch/arm64/include/asm/kvm_mmu.h | 16 +++++ > arch/arm64/kvm/arm.c | 9 ++- > arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 - > arch/arm64/kvm/mmu.c | 107 +++++++++++++++++++++-------- > 4 files changed, 101 insertions(+), 33 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h > index 66d93e320ec8..d116ab4230e8 100644 > --- a/arch/arm64/include/asm/kvm_mmu.h > +++ b/arch/arm64/include/asm/kvm_mmu.h > @@ -353,6 +353,22 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) > return &kvm->arch.mmu != mmu; > } > > +static inline void kvm_fault_lock(struct kvm *kvm) > +{ > + if (is_protected_kvm_enabled()) > + write_lock(&kvm->mmu_lock); > + else > + read_lock(&kvm->mmu_lock); > +} > + > +static inline void kvm_fault_unlock(struct kvm *kvm) > +{ > + if (is_protected_kvm_enabled()) > + write_unlock(&kvm->mmu_lock); > + else > + read_unlock(&kvm->mmu_lock); > +} > + > #ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS > void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); > #else > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index 55cc62b2f469..9bcbc7b8ed38 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -502,7 +502,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) > > void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > { > - kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); > + if (!is_protected_kvm_enabled()) > + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); > + else > + free_hyp_memcache(&vcpu->arch.pkvm_memcache); > kvm_timer_vcpu_terminate(vcpu); > kvm_pmu_vcpu_destroy(vcpu); > kvm_vgic_vcpu_destroy(vcpu); > @@ -574,6 +577,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > struct kvm_s2_mmu *mmu; > int *last_ran; > > + if (is_protected_kvm_enabled()) > + goto nommu; > + > if (vcpu_has_nv(vcpu)) > kvm_vcpu_load_hw_mmu(vcpu); > > @@ -594,6 +600,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > *last_ran = vcpu->vcpu_idx; > } > > +nommu: > vcpu->cpu = cpu; > > kvm_vgic_load(vcpu); > diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c > index 130f5f23bcb5..258d572eed62 100644 > --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c > +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c > @@ -103,8 +103,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) > /* Limit guest vector length to the maximum supported by the host. */ > hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl); > > - hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu; > - > hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; > hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE); > hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) & > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 641e4fec1659..7c2995cb4577 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -15,6 +15,7 @@ > #include <asm/kvm_arm.h> > #include <asm/kvm_mmu.h> > #include <asm/kvm_pgtable.h> > +#include <asm/kvm_pkvm.h> > #include <asm/kvm_ras.h> > #include <asm/kvm_asm.h> > #include <asm/kvm_emulate.h> > @@ -31,6 +32,14 @@ static phys_addr_t __ro_after_init hyp_idmap_vector; > > static unsigned long __ro_after_init io_map_base; > > +#define KVM_PGT_S2(fn, ...) \ > + ({ \ > + typeof(kvm_pgtable_stage2_ ## fn) *__fn = kvm_pgtable_stage2_ ## fn; \ > + if (is_protected_kvm_enabled()) \ > + __fn = pkvm_pgtable_ ## fn; \ > + __fn(__VA_ARGS__); \ > + }) > + My gripe with this is that it makes it much harder to follow what is happening by using tags (ctags, etags, whatever). I ended up with the hack below, which is super ugly, but preserves the tagging functionality for non-pKVM. I'll scratch my head to find something more elegant... M. diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 76a8b70176a6c..b9b9acb685d8f 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -143,21 +143,21 @@ struct pkvm_mapping { u64 pfn; }; -int pkvm_pgtable_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops); -void pkvm_pgtable_destroy(struct kvm_pgtable *pgt); -int pkvm_pgtable_map(struct kvm_pgtable *pgt, u64 addr, u64 size, +int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops); +void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); +int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, void *mc, enum kvm_pgtable_walk_flags flags); -int pkvm_pgtable_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); -int pkvm_pgtable_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); -int pkvm_pgtable_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); -bool pkvm_pgtable_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold); -int pkvm_pgtable_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, +int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); +int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); +int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); +bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold); +int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags); -void pkvm_pgtable_mkyoung(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_walk_flags flags); -int pkvm_pgtable_split(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_mmu_memory_cache *mc); -void pkvm_pgtable_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level); -kvm_pte_t *pkvm_pgtable_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, +void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_walk_flags flags); +int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_mmu_memory_cache *mc); +void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level); +kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, enum kvm_pgtable_prot prot, void *mc, bool force_pte); #endif /* __ARM64_KVM_PKVM_H__ */ diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 7c2995cb45773..4b9153468a327 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -32,12 +32,13 @@ static phys_addr_t __ro_after_init hyp_idmap_vector; static unsigned long __ro_after_init io_map_base; -#define KVM_PGT_S2(fn, ...) \ - ({ \ - typeof(kvm_pgtable_stage2_ ## fn) *__fn = kvm_pgtable_stage2_ ## fn; \ - if (is_protected_kvm_enabled()) \ - __fn = pkvm_pgtable_ ## fn; \ - __fn(__VA_ARGS__); \ +#define __S2(fn, ...) \ + ({ \ + typeof(fn) *__fn = fn; \ + /* upgrade the function name from kvm_* to pkvm_* */ \ + if (is_protected_kvm_enabled()) \ + __fn = p ## fn; \ + __fn(__VA_ARGS__); \ }) static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end, @@ -156,7 +157,7 @@ static int kvm_mmu_split_huge_pages(struct kvm *kvm, phys_addr_t addr, return -EINVAL; next = __stage2_range_addr_end(addr, end, chunk_size); - ret = KVM_PGT_S2(split, pgt, addr, next - addr, cache); + ret = __S2(kvm_pgtable_stage2_split, pgt, addr, next - addr, cache); if (ret) break; } while (addr = next, addr != end); @@ -242,7 +243,7 @@ static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head) void *pgtable = page_to_virt(page); s8 level = page_private(page); - KVM_PGT_S2(free_unlinked, &kvm_s2_mm_ops, pgtable, level); + __S2(kvm_pgtable_stage2_free_unlinked, &kvm_s2_mm_ops, pgtable, level); } static void stage2_free_unlinked_table(void *addr, s8 level) @@ -299,7 +300,7 @@ static void invalidate_icache_guest_page(void *va, size_t size) static int kvm_s2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) { - return KVM_PGT_S2(unmap, pgt, addr, size); + return __S2(kvm_pgtable_stage2_unmap, pgt, addr, size); } /* @@ -357,7 +358,7 @@ void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, static int kvm_s2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) { - return KVM_PGT_S2(flush, pgt, addr, size); + return __S2(kvm_pgtable_stage2_flush, pgt, addr, size); } void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) @@ -968,7 +969,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t return -ENOMEM; mmu->arch = &kvm->arch; - err = KVM_PGT_S2(init, pgt, mmu, &kvm_s2_mm_ops); + err = __S2(kvm_pgtable_stage2_init, pgt, mmu, &kvm_s2_mm_ops); if (err) goto out_free_pgtable; @@ -997,7 +998,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t return 0; out_destroy_pgtable: - KVM_PGT_S2(destroy, pgt); + __S2(kvm_pgtable_stage2_destroy, pgt); out_free_pgtable: kfree(pgt); return err; @@ -1094,7 +1095,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) write_unlock(&kvm->mmu_lock); if (pgt) { - KVM_PGT_S2(destroy, pgt); + __S2(kvm_pgtable_stage2_destroy, pgt); kfree(pgt); } } @@ -1167,7 +1168,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, break; write_lock(&kvm->mmu_lock); - ret = KVM_PGT_S2(map, pgt, addr, PAGE_SIZE, pa, prot, &cache, 0); + ret = __S2(kvm_pgtable_stage2_map, pgt, addr, PAGE_SIZE, pa, prot, &cache, 0); write_unlock(&kvm->mmu_lock); if (ret) break; @@ -1181,7 +1182,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, static int kvm_s2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) { - return KVM_PGT_S2(wrprotect, pgt, addr, size); + return __S2(kvm_pgtable_stage2_wrprotect, pgt, addr, size); } /** * kvm_stage2_wp_range() - write protect stage2 memory region range @@ -1743,9 +1744,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * PTE, which will be preserved. */ prot &= ~KVM_NV_GUEST_MAP_SZ; - ret = KVM_PGT_S2(relax_perms, pgt, fault_ipa, prot, flags); + ret = __S2(kvm_pgtable_stage2_relax_perms, pgt, fault_ipa, prot, flags); } else { - ret = KVM_PGT_S2(map, pgt, fault_ipa, vma_pagesize, + ret = __S2(kvm_pgtable_stage2_map, pgt, fault_ipa, vma_pagesize, __pfn_to_phys(pfn), prot, memcache, flags); } @@ -1771,7 +1772,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) read_lock(&vcpu->kvm->mmu_lock); mmu = vcpu->arch.hw_mmu; - KVM_PGT_S2(mkyoung, mmu->pgt, fault_ipa, flags); + __S2(kvm_pgtable_stage2_mkyoung, mmu->pgt, fault_ipa, flags); read_unlock(&vcpu->kvm->mmu_lock); } @@ -1977,7 +1978,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (!kvm->arch.mmu.pgt) return false; - return KVM_PGT_S2(test_clear_young, kvm->arch.mmu.pgt, + return __S2(kvm_pgtable_stage2_test_clear_young, kvm->arch.mmu.pgt, range->start << PAGE_SHIFT, size, true); /* @@ -1993,7 +1994,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (!kvm->arch.mmu.pgt) return false; - return KVM_PGT_S2(test_clear_young, kvm->arch.mmu.pgt, + return __S2(kvm_pgtable_stage2_test_clear_young, kvm->arch.mmu.pgt, range->start << PAGE_SHIFT, size, false); } diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 9de9159afa5a1..37d6494d0fd87 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -317,7 +317,7 @@ static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn) break; \ else -int pkvm_pgtable_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops) +int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops) { pgt->pkvm_mappings = RB_ROOT; pgt->mmu = mmu; @@ -325,7 +325,7 @@ int pkvm_pgtable_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kv return 0; } -void pkvm_pgtable_destroy(struct kvm_pgtable *pgt) +void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) { struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); pkvm_handle_t handle = kvm->arch.pkvm.handle; @@ -345,7 +345,7 @@ void pkvm_pgtable_destroy(struct kvm_pgtable *pgt) } } -int pkvm_pgtable_map(struct kvm_pgtable *pgt, u64 addr, u64 size, +int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, void *mc, enum kvm_pgtable_walk_flags flags) { @@ -375,7 +375,7 @@ int pkvm_pgtable_map(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -int pkvm_pgtable_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) +int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) { struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); pkvm_handle_t handle = kvm->arch.pkvm.handle; @@ -394,7 +394,7 @@ int pkvm_pgtable_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) return ret; } -int pkvm_pgtable_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) +int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) { struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); pkvm_handle_t handle = kvm->arch.pkvm.handle; @@ -411,7 +411,7 @@ int pkvm_pgtable_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) return ret; } -int pkvm_pgtable_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) +int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) { struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); struct pkvm_mapping *mapping; @@ -423,7 +423,7 @@ int pkvm_pgtable_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) return 0; } -bool pkvm_pgtable_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold) +bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold) { struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); pkvm_handle_t handle = kvm->arch.pkvm.handle; @@ -438,30 +438,30 @@ bool pkvm_pgtable_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, return young; } -int pkvm_pgtable_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, +int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags) { return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot); } -void pkvm_pgtable_mkyoung(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_walk_flags flags) +void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_walk_flags flags) { WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT)); } -void pkvm_pgtable_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) +void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) { WARN_ON_ONCE(1); } -kvm_pte_t *pkvm_pgtable_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, +kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, enum kvm_pgtable_prot prot, void *mc, bool force_pte) { WARN_ON_ONCE(1); return NULL; } -int pkvm_pgtable_split(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_mmu_memory_cache *mc) +int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_mmu_memory_cache *mc) { WARN_ON_ONCE(1); return -EINVAL;
On Tuesday 17 Dec 2024 at 14:03:37 (+0000), Marc Zyngier wrote: > My gripe with this is that it makes it much harder to follow what is > happening by using tags (ctags, etags, whatever). I ended up with the > hack below, which is super ugly, but preserves the tagging > functionality for non-pKVM. Ack. > I'll scratch my head to find something more elegant... I find your proposal pretty reasonable -- I had a few different ideas but they were all really over-engineered, so I figured relying on a naming convention was the simplest. And any divergence will be flagged at compile time, so that shouldn't be too hard to maintain looking forward. The __S2 name isn't massively descriptive though. Maybe KVM_PGT_CALL() or something? Thinking about it, this abstraction doesn't need to be restricted to stage-2 stuff. We could most likely hide the __pkvm_host_{un}share_hyp() logic behind a pkvm_pgtable_hyp_{un}map() implementation in pkvm.c as well...
On Tue, 17 Dec 2024 14:31:35 +0000, Quentin Perret <qperret@google.com> wrote: > > On Tuesday 17 Dec 2024 at 14:03:37 (+0000), Marc Zyngier wrote: > > My gripe with this is that it makes it much harder to follow what is > > happening by using tags (ctags, etags, whatever). I ended up with the > > hack below, which is super ugly, but preserves the tagging > > functionality for non-pKVM. > > Ack. > > > I'll scratch my head to find something more elegant... > > I find your proposal pretty reasonable -- I had a few different ideas > but they were all really over-engineered, so I figured relying on a > naming convention was the simplest. And any divergence will be flagged > at compile time, so that shouldn't be too hard to maintain looking > forward. > > The __S2 name isn't massively descriptive though. Maybe KVM_PGT_CALL() > or something? Thinking about it, this abstraction doesn't need to be > restricted to stage-2 stuff. We could most likely hide the > __pkvm_host_{un}share_hyp() logic behind a pkvm_pgtable_hyp_{un}map() > implementation in pkvm.c as well... Oh, I'm happy with *any* name. I just changed it to make sure any missing occurrence would blow up. And yes, if we can make that more uniform, I'm all for that. Thanks, M.
On Tuesday 17 Dec 2024 at 15:38:21 (+0000), Marc Zyngier wrote: > On Tue, 17 Dec 2024 14:31:35 +0000, > Quentin Perret <qperret@google.com> wrote: > > > > On Tuesday 17 Dec 2024 at 14:03:37 (+0000), Marc Zyngier wrote: > > > My gripe with this is that it makes it much harder to follow what is > > > happening by using tags (ctags, etags, whatever). I ended up with the > > > hack below, which is super ugly, but preserves the tagging > > > functionality for non-pKVM. > > > > Ack. > > > > > I'll scratch my head to find something more elegant... > > > > I find your proposal pretty reasonable -- I had a few different ideas > > but they were all really over-engineered, so I figured relying on a > > naming convention was the simplest. And any divergence will be flagged > > at compile time, so that shouldn't be too hard to maintain looking > > forward. > > > > The __S2 name isn't massively descriptive though. Maybe KVM_PGT_CALL() > > or something? Thinking about it, this abstraction doesn't need to be > > restricted to stage-2 stuff. We could most likely hide the > > __pkvm_host_{un}share_hyp() logic behind a pkvm_pgtable_hyp_{un}map() > > implementation in pkvm.c as well... > > Oh, I'm happy with *any* name. I just changed it to make sure any > missing occurrence would blow up. > > And yes, if we can make that more uniform, I'm all for that. I had a go at porting the hyp stage-1 code to the same logic and ended up with the diff below. It's not completely obvious it is much better than the existing code TBH. I ended up resorting to odd things like passing a NULL pgt to the pkvm_pgtable_hyp_*() functions and such. All the mess comes from the pKVM boot flow, where Linux originally creates the hyp stage-1 page-table, but then frees it after pKVM has initialized and switches to using hypercalls. None of this is needed for this series though, so I won't include that in v4. I'll post it separately once that series lands, and then we can decide if it's worth it, or if it should be done differently. diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index d116ab4230e8..b35c909f4d0a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -152,8 +152,7 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #include <asm/kvm_pgtable.h> #include <asm/stage2_pgtable.h> -int kvm_share_hyp(void *from, void *to); -void kvm_unshare_hyp(void *from, void *to); +void remove_hyp_mappings(void *from, void *to); int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int __create_hyp_mappings(unsigned long start, unsigned long size, unsigned long phys, enum kvm_pgtable_prot prot); diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 65f988b6fe0d..db7851459ef3 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -143,6 +143,11 @@ struct pkvm_mapping { u64 pfn; }; +int pkvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, struct kvm_pgtable_mm_ops *mm_ops); +void pkvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); +int pkvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, + enum kvm_pgtable_prot prot); +u64 pkvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops); void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9bcbc7b8ed38..2dada891c199 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -183,7 +183,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_init_nested(kvm); - ret = kvm_share_hyp(kvm, kvm + 1); + ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); if (ret) return ret; @@ -217,7 +217,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) err_free_cpumask: free_cpumask_var(kvm->arch.supported_cpus); err_unshare_kvm: - kvm_unshare_hyp(kvm, kvm + 1); + remove_hyp_mappings(kvm, kvm + 1); return ret; } @@ -268,7 +268,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kfree(kvm->arch.sysreg_masks); kvm_destroy_vcpus(kvm); - kvm_unshare_hyp(kvm, kvm + 1); + remove_hyp_mappings(kvm, kvm + 1); kvm_arm_teardown_hypercalls(kvm); } @@ -493,7 +493,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) if (err) return err; - return kvm_share_hyp(vcpu, vcpu + 1); + return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); } void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index ea5484ce1f3b..49acdda3f1d0 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -33,7 +33,7 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) return 0; /* Make sure the host task fpsimd state is visible to hyp: */ - ret = kvm_share_hyp(fpsimd, fpsimd + 1); + ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP); if (ret) return ret; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 4e6cf4a1a6eb..53e584a5e8d7 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -407,44 +407,20 @@ void __init free_hyp_pgds(void) { mutex_lock(&kvm_hyp_pgd_mutex); if (hyp_pgtable) { - kvm_pgtable_hyp_destroy(hyp_pgtable); + KVM_PGT_CALL(kvm_pgtable_hyp_destroy, hyp_pgtable); kfree(hyp_pgtable); hyp_pgtable = NULL; } mutex_unlock(&kvm_hyp_pgd_mutex); } -static bool kvm_host_owns_hyp_mappings(void) -{ - if (is_kernel_in_hyp_mode()) - return false; - - if (static_branch_likely(&kvm_protected_mode_initialized)) - return false; - - /* - * This can happen at boot time when __create_hyp_mappings() is called - * after the hyp protection has been enabled, but the static key has - * not been flipped yet. - */ - if (!hyp_pgtable && is_protected_kvm_enabled()) - return false; - - WARN_ON(!hyp_pgtable); - - return true; -} - int __create_hyp_mappings(unsigned long start, unsigned long size, unsigned long phys, enum kvm_pgtable_prot prot) { int err; - if (WARN_ON(!kvm_host_owns_hyp_mappings())) - return -EINVAL; - mutex_lock(&kvm_hyp_pgd_mutex); - err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot); + err = KVM_PGT_CALL(kvm_pgtable_hyp_map, hyp_pgtable, start, size, phys, prot); mutex_unlock(&kvm_hyp_pgd_mutex); return err; @@ -461,138 +437,18 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr) } } -struct hyp_shared_pfn { - u64 pfn; - int count; - struct rb_node node; -}; - -static DEFINE_MUTEX(hyp_shared_pfns_lock); -static struct rb_root hyp_shared_pfns = RB_ROOT; - -static struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node, - struct rb_node **parent) -{ - struct hyp_shared_pfn *this; - - *node = &hyp_shared_pfns.rb_node; - *parent = NULL; - while (**node) { - this = container_of(**node, struct hyp_shared_pfn, node); - *parent = **node; - if (this->pfn < pfn) - *node = &((**node)->rb_left); - else if (this->pfn > pfn) - *node = &((**node)->rb_right); - else - return this; - } - - return NULL; -} - -static int share_pfn_hyp(u64 pfn) -{ - struct rb_node **node, *parent; - struct hyp_shared_pfn *this; - int ret = 0; - - mutex_lock(&hyp_shared_pfns_lock); - this = find_shared_pfn(pfn, &node, &parent); - if (this) { - this->count++; - goto unlock; - } - - this = kzalloc(sizeof(*this), GFP_KERNEL); - if (!this) { - ret = -ENOMEM; - goto unlock; - } - - this->pfn = pfn; - this->count = 1; - rb_link_node(&this->node, parent, node); - rb_insert_color(&this->node, &hyp_shared_pfns); - ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1); -unlock: - mutex_unlock(&hyp_shared_pfns_lock); - - return ret; -} - -static int unshare_pfn_hyp(u64 pfn) -{ - struct rb_node **node, *parent; - struct hyp_shared_pfn *this; - int ret = 0; - - mutex_lock(&hyp_shared_pfns_lock); - this = find_shared_pfn(pfn, &node, &parent); - if (WARN_ON(!this)) { - ret = -ENOENT; - goto unlock; - } - - this->count--; - if (this->count) - goto unlock; - - rb_erase(&this->node, &hyp_shared_pfns); - kfree(this); - ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1); -unlock: - mutex_unlock(&hyp_shared_pfns_lock); - - return ret; -} - -int kvm_share_hyp(void *from, void *to) -{ - phys_addr_t start, end, cur; - u64 pfn; - int ret; - - if (is_kernel_in_hyp_mode()) - return 0; - - /* - * The share hcall maps things in the 'fixed-offset' region of the hyp - * VA space, so we can only share physically contiguous data-structures - * for now. - */ - if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to)) - return -EINVAL; - - if (kvm_host_owns_hyp_mappings()) - return create_hyp_mappings(from, to, PAGE_HYP); - - start = ALIGN_DOWN(__pa(from), PAGE_SIZE); - end = PAGE_ALIGN(__pa(to)); - for (cur = start; cur < end; cur += PAGE_SIZE) { - pfn = __phys_to_pfn(cur); - ret = share_pfn_hyp(pfn); - if (ret) - return ret; - } - - return 0; -} - -void kvm_unshare_hyp(void *from, void *to) +void remove_hyp_mappings(void *from, void *to) { - phys_addr_t start, end, cur; - u64 pfn; + unsigned long start = kern_hyp_va((unsigned long)from); + unsigned long end = kern_hyp_va((unsigned long)to); + unsigned long size = end - start; - if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from) + if (!is_protected_kvm_enabled() || !from) return; - start = ALIGN_DOWN(__pa(from), PAGE_SIZE); - end = PAGE_ALIGN(__pa(to)); - for (cur = start; cur < end; cur += PAGE_SIZE) { - pfn = __phys_to_pfn(cur); - WARN_ON(unshare_pfn_hyp(pfn)); - } + mutex_lock(&kvm_hyp_pgd_mutex); + WARN_ON(KVM_PGT_CALL(kvm_pgtable_hyp_unmap, hyp_pgtable, start, size) != size); + mutex_unlock(&kvm_hyp_pgd_mutex); } /** @@ -615,9 +471,6 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot) if (is_kernel_in_hyp_mode()) return 0; - if (!kvm_host_owns_hyp_mappings()) - return -EPERM; - start = start & PAGE_MASK; end = PAGE_ALIGN(end); @@ -699,16 +552,6 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, unsigned long addr; int ret = 0; - if (!kvm_host_owns_hyp_mappings()) { - addr = kvm_call_hyp_nvhe(__pkvm_create_private_mapping, - phys_addr, size, prot); - if (IS_ERR_VALUE(addr)) - return addr; - *haddr = addr; - - return 0; - } - size = PAGE_ALIGN(size + offset_in_page(phys_addr)); ret = hyp_alloc_private_va_range(size, &addr); if (ret) @@ -2094,7 +1937,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits) goto out; } - err = kvm_pgtable_hyp_init(hyp_pgtable, *hyp_va_bits, &kvm_hyp_mm_ops); + err = KVM_PGT_CALL(kvm_pgtable_hyp_init, hyp_pgtable, *hyp_va_bits, &kvm_hyp_mm_ops); if (err) goto out_free_pgtable; @@ -2106,7 +1949,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits) return 0; out_destroy_pgtable: - kvm_pgtable_hyp_destroy(hyp_pgtable); + KVM_PGT_CALL(kvm_pgtable_hyp_destroy, hyp_pgtable); out_free_pgtable: kfree(hyp_pgtable); hyp_pgtable = NULL; diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 64de20e8001d..f5a02b4039b1 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -270,6 +270,124 @@ static int __init finalize_pkvm(void) } device_initcall_sync(finalize_pkvm); +struct hyp_shared_page { + struct rb_node node; + phys_addr_t phys; + void *hyp_va; + int count; +}; +static struct rb_root hyp_shared_pages = RB_ROOT; + +static struct hyp_shared_page *find_shared_page(void *hyp_va, struct rb_node ***node, + struct rb_node **parent) +{ + struct hyp_shared_page *page; + + *node = &hyp_shared_pages.rb_node; + *parent = NULL; + while (**node) { + page = container_of(**node, struct hyp_shared_page, node); + *parent = **node; + if (page->hyp_va < hyp_va) + *node = &((**node)->rb_left); + else if (page->hyp_va > hyp_va) + *node = &((**node)->rb_right); + else + return page; + } + + return NULL; +} + +int pkvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, struct kvm_pgtable_mm_ops *mm_ops) +{ + if (pgt) + return kvm_pgtable_hyp_init(pgt, va_bits, mm_ops); + return 0; +} + +void pkvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) +{ + if (pgt) + return kvm_pgtable_hyp_destroy(pgt); +} + +static int share_page_hyp(void *hyp_va, phys_addr_t phys) +{ + struct rb_node **node, *parent; + struct hyp_shared_page *page; + + page = find_shared_page(hyp_va, &node, &parent); + if (page) { + page->count++; + return 0; + } + + page = kzalloc(sizeof(*page), GFP_KERNEL); + if (!page) + return -ENOMEM; + page->hyp_va = hyp_va; + page->phys = phys; + page->count = 1; + rb_link_node(&page->node, parent, node); + rb_insert_color(&page->node, &hyp_shared_pages); + + return kvm_call_hyp_nvhe(__pkvm_host_share_hyp, phys >> PAGE_SHIFT, 1); +} + +int pkvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, + enum kvm_pgtable_prot prot) +{ + u64 off; + int ret; + + if (pgt) + return kvm_pgtable_hyp_map(pgt, addr, size, phys, prot); + + addr = ALIGN_DOWN(addr, PAGE_SIZE); + phys = ALIGN_DOWN(phys, PAGE_SIZE); + size = PAGE_ALIGN(size); + if (addr != (u64)kern_hyp_va(__va(phys))) + return -EINVAL; + if (prot != PAGE_HYP) + return -EPERM; + + for (off = 0; off < size; off += PAGE_SIZE) { + ret = share_page_hyp((void *)(addr + off), phys + off); + if (ret) + return ret; + } + + return 0; +} + +u64 pkvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + struct rb_node **node, *parent, *next; + struct hyp_shared_page *page; + u64 pfn, off = 0; + + if (pgt) + return kvm_pgtable_hyp_unmap(pgt, addr, size); + + page = find_shared_page((void *)addr, &node, &parent); + while (page && ((u64)page->hyp_va == addr + off) && off < size) { + next = rb_next(&page->node); + page->count--; + if (!page->count) { + pfn = page->phys >> PAGE_SHIFT; + rb_erase(&page->node, &hyp_shared_pages); + kfree(page); + if (kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1)) + break; + } + off += PAGE_SIZE; + page = next ? container_of(next, struct hyp_shared_page, node) : NULL; + } + + return off; +} + static int cmp_mappings(struct rb_node *node, const struct rb_node *parent) { struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 470524b31951..e8b3d08e26dd 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -115,7 +115,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) if (!buf) return -ENOMEM; - ret = kvm_share_hyp(buf, buf + reg_sz); + ret = create_hyp_mappings(buf, buf + reg_sz, PAGE_HYP); if (ret) { kfree(buf); return ret; @@ -154,9 +154,9 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) { void *sve_state = vcpu->arch.sve_state; - kvm_unshare_hyp(vcpu, vcpu + 1); + remove_hyp_mappings(vcpu, vcpu + 1); if (sve_state) - kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu)); + remove_hyp_mappings(sve_state, sve_state + vcpu_sve_state_size(vcpu)); kfree(sve_state); kfree(vcpu->arch.ccsidr); }
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 66d93e320ec8..d116ab4230e8 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -353,6 +353,22 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) return &kvm->arch.mmu != mmu; } +static inline void kvm_fault_lock(struct kvm *kvm) +{ + if (is_protected_kvm_enabled()) + write_lock(&kvm->mmu_lock); + else + read_lock(&kvm->mmu_lock); +} + +static inline void kvm_fault_unlock(struct kvm *kvm) +{ + if (is_protected_kvm_enabled()) + write_unlock(&kvm->mmu_lock); + else + read_unlock(&kvm->mmu_lock); +} + #ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); #else diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 55cc62b2f469..9bcbc7b8ed38 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -502,7 +502,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { - kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); + if (!is_protected_kvm_enabled()) + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); + else + free_hyp_memcache(&vcpu->arch.pkvm_memcache); kvm_timer_vcpu_terminate(vcpu); kvm_pmu_vcpu_destroy(vcpu); kvm_vgic_vcpu_destroy(vcpu); @@ -574,6 +577,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) struct kvm_s2_mmu *mmu; int *last_ran; + if (is_protected_kvm_enabled()) + goto nommu; + if (vcpu_has_nv(vcpu)) kvm_vcpu_load_hw_mmu(vcpu); @@ -594,6 +600,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) *last_ran = vcpu->vcpu_idx; } +nommu: vcpu->cpu = cpu; kvm_vgic_load(vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 130f5f23bcb5..258d572eed62 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -103,8 +103,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) /* Limit guest vector length to the maximum supported by the host. */ hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl); - hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu; - hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE); hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) & diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 641e4fec1659..7c2995cb4577 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -15,6 +15,7 @@ #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> #include <asm/kvm_pgtable.h> +#include <asm/kvm_pkvm.h> #include <asm/kvm_ras.h> #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> @@ -31,6 +32,14 @@ static phys_addr_t __ro_after_init hyp_idmap_vector; static unsigned long __ro_after_init io_map_base; +#define KVM_PGT_S2(fn, ...) \ + ({ \ + typeof(kvm_pgtable_stage2_ ## fn) *__fn = kvm_pgtable_stage2_ ## fn; \ + if (is_protected_kvm_enabled()) \ + __fn = pkvm_pgtable_ ## fn; \ + __fn(__VA_ARGS__); \ + }) + static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end, phys_addr_t size) { @@ -147,7 +156,7 @@ static int kvm_mmu_split_huge_pages(struct kvm *kvm, phys_addr_t addr, return -EINVAL; next = __stage2_range_addr_end(addr, end, chunk_size); - ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache); + ret = KVM_PGT_S2(split, pgt, addr, next - addr, cache); if (ret) break; } while (addr = next, addr != end); @@ -168,15 +177,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot) */ int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { - kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); + if (is_protected_kvm_enabled()) + kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle); + else + kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); return 0; } int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) { - kvm_tlb_flush_vmid_range(&kvm->arch.mmu, - gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); + u64 size = nr_pages << PAGE_SHIFT; + u64 addr = gfn << PAGE_SHIFT; + + if (is_protected_kvm_enabled()) + kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle); + else + kvm_tlb_flush_vmid_range(&kvm->arch.mmu, addr, size); return 0; } @@ -225,7 +242,7 @@ static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head) void *pgtable = page_to_virt(page); s8 level = page_private(page); - kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level); + KVM_PGT_S2(free_unlinked, &kvm_s2_mm_ops, pgtable, level); } static void stage2_free_unlinked_table(void *addr, s8 level) @@ -280,6 +297,11 @@ static void invalidate_icache_guest_page(void *va, size_t size) __invalidate_icache_guest_page(va, size); } +static int kvm_s2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + return KVM_PGT_S2(unmap, pgt, addr, size); +} + /* * Unmapping vs dcache management: * @@ -324,8 +346,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 lockdep_assert_held_write(&kvm->mmu_lock); WARN_ON(size & ~PAGE_MASK); - WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap, - may_block)); + WARN_ON(stage2_apply_range(mmu, start, end, kvm_s2_unmap, may_block)); } void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, @@ -334,9 +355,14 @@ void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, __unmap_stage2_range(mmu, start, size, may_block); } +static int kvm_s2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + return KVM_PGT_S2(flush, pgt, addr, size); +} + void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) { - stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_flush); + stage2_apply_range_resched(mmu, addr, end, kvm_s2_flush); } static void stage2_flush_memslot(struct kvm *kvm, @@ -942,10 +968,14 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t return -ENOMEM; mmu->arch = &kvm->arch; - err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops); + err = KVM_PGT_S2(init, pgt, mmu, &kvm_s2_mm_ops); if (err) goto out_free_pgtable; + mmu->pgt = pgt; + if (is_protected_kvm_enabled()) + return 0; + mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); if (!mmu->last_vcpu_ran) { err = -ENOMEM; @@ -959,7 +989,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT; mmu->split_page_cache.gfp_zero = __GFP_ZERO; - mmu->pgt = pgt; mmu->pgd_phys = __pa(pgt->pgd); if (kvm_is_nested_s2_mmu(kvm, mmu)) @@ -968,7 +997,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t return 0; out_destroy_pgtable: - kvm_pgtable_stage2_destroy(pgt); + KVM_PGT_S2(destroy, pgt); out_free_pgtable: kfree(pgt); return err; @@ -1065,7 +1094,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) write_unlock(&kvm->mmu_lock); if (pgt) { - kvm_pgtable_stage2_destroy(pgt); + KVM_PGT_S2(destroy, pgt); kfree(pgt); } } @@ -1082,9 +1111,11 @@ static void *hyp_mc_alloc_fn(void *unused) void free_hyp_memcache(struct kvm_hyp_memcache *mc) { - if (is_protected_kvm_enabled()) - __free_hyp_memcache(mc, hyp_mc_free_fn, - kvm_host_va, NULL); + if (!is_protected_kvm_enabled()) + return; + + kfree(mc->mapping); + __free_hyp_memcache(mc, hyp_mc_free_fn, kvm_host_va, NULL); } int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages) @@ -1092,6 +1123,12 @@ int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages) if (!is_protected_kvm_enabled()) return 0; + if (!mc->mapping) { + mc->mapping = kzalloc(sizeof(struct pkvm_mapping), GFP_KERNEL_ACCOUNT); + if (!mc->mapping) + return -ENOMEM; + } + return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn, kvm_host_pa, NULL); } @@ -1130,8 +1167,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, break; write_lock(&kvm->mmu_lock); - ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot, - &cache, 0); + ret = KVM_PGT_S2(map, pgt, addr, PAGE_SIZE, pa, prot, &cache, 0); write_unlock(&kvm->mmu_lock); if (ret) break; @@ -1143,6 +1179,10 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, return ret; } +static int kvm_s2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + return KVM_PGT_S2(wrprotect, pgt, addr, size); +} /** * kvm_stage2_wp_range() - write protect stage2 memory region range * @mmu: The KVM stage-2 MMU pointer @@ -1151,7 +1191,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, */ void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) { - stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect); + stage2_apply_range_resched(mmu, addr, end, kvm_s2_wrprotect); } /** @@ -1442,9 +1482,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, unsigned long mmu_seq; phys_addr_t ipa = fault_ipa; struct kvm *kvm = vcpu->kvm; - struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; short vma_shift; + void *memcache; gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); @@ -1472,8 +1512,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * and a write fault needs to collapse a block entry into a table. */ if (!fault_is_perm || (logging_active && write_fault)) { - ret = kvm_mmu_topup_memory_cache(memcache, - kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu)); + int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); + + if (!is_protected_kvm_enabled()) { + memcache = &vcpu->arch.mmu_page_cache; + ret = kvm_mmu_topup_memory_cache(memcache, min_pages); + } else { + memcache = &vcpu->arch.pkvm_memcache; + ret = topup_hyp_memcache(memcache, min_pages); + } if (ret) return ret; } @@ -1494,7 +1541,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * logging_active is guaranteed to never be true for VM_PFNMAP * memslots. */ - if (logging_active) { + if (logging_active || is_protected_kvm_enabled()) { force_pte = true; vma_shift = PAGE_SHIFT; } else { @@ -1634,7 +1681,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, prot |= kvm_encode_nested_level(nested); } - read_lock(&kvm->mmu_lock); + kvm_fault_lock(kvm); pgt = vcpu->arch.hw_mmu->pgt; if (mmu_invalidate_retry(kvm, mmu_seq)) { ret = -EAGAIN; @@ -1696,16 +1743,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * PTE, which will be preserved. */ prot &= ~KVM_NV_GUEST_MAP_SZ; - ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot, flags); + ret = KVM_PGT_S2(relax_perms, pgt, fault_ipa, prot, flags); } else { - ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, + ret = KVM_PGT_S2(map, pgt, fault_ipa, vma_pagesize, __pfn_to_phys(pfn), prot, memcache, flags); } out_unlock: kvm_release_faultin_page(kvm, page, !!ret, writable); - read_unlock(&kvm->mmu_lock); + kvm_fault_unlock(kvm); /* Mark the page dirty only if the fault is handled successfully */ if (writable && !ret) @@ -1724,7 +1771,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) read_lock(&vcpu->kvm->mmu_lock); mmu = vcpu->arch.hw_mmu; - kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa, flags); + KVM_PGT_S2(mkyoung, mmu->pgt, fault_ipa, flags); read_unlock(&vcpu->kvm->mmu_lock); } @@ -1764,7 +1811,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) } /* Falls between the IPA range and the PARange? */ - if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) { + if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) { fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); if (is_iabt) @@ -1930,7 +1977,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (!kvm->arch.mmu.pgt) return false; - return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, + return KVM_PGT_S2(test_clear_young, kvm->arch.mmu.pgt, range->start << PAGE_SHIFT, size, true); /* @@ -1946,7 +1993,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (!kvm->arch.mmu.pgt) return false; - return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, + return KVM_PGT_S2(test_clear_young, kvm->arch.mmu.pgt, range->start << PAGE_SHIFT, size, false); }
Introduce the KVM_PGT_S2() helper macro to allow switching from the traditional pgtable code to the pKVM version easily in mmu.c. The cost of this 'indirection' is expected to be very minimal due to is_protected_kvm_enabled() being backed by a static key. With this, everything is in place to allow the delegation of non-protected guest stage-2 page-tables to pKVM, so let's stop using the host's kvm_s2_mmu from EL2 and enjoy the ride. Signed-off-by: Quentin Perret <qperret@google.com> --- arch/arm64/include/asm/kvm_mmu.h | 16 +++++ arch/arm64/kvm/arm.c | 9 ++- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 - arch/arm64/kvm/mmu.c | 107 +++++++++++++++++++++-------- 4 files changed, 101 insertions(+), 33 deletions(-)