Message ID | 20230409063000.3559991-14-ricarkol@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Implement Eager Page Splitting for ARM | expand |
On 4/9/23 2:30 PM, Ricardo Koller wrote: > From: Marc Zyngier <maz@kernel.org> > > Broadcast TLB invalidations (TLBIs) targeting the Inner Shareable > Domain are usually less performant than their non-shareable variant. > In particular, we observed some implementations that take > millliseconds to complete parallel broadcasted TLBIs. > > It's safe to use non-shareable TLBIs when relaxing permissions on a > PTE in the KVM case. According to the ARM ARM (0487I.a) section > D8.13.1 "Using break-before-make when updating translation table > entries", permission relaxation does not need break-before-make. > Specifically, R_WHZWS states that these are the only changes that > require a break-before-make sequence: changes of memory type > (Shareability or Cacheability), address changes, or changing the block > size. > > Signed-off-by: Marc Zyngier <maz@kernel.org> > Signed-off-by: Ricardo Koller <ricarkol@google.com> > --- > arch/arm64/include/asm/kvm_asm.h | 4 +++ > arch/arm64/kvm/hyp/nvhe/hyp-main.c | 10 ++++++ > arch/arm64/kvm/hyp/nvhe/tlb.c | 54 ++++++++++++++++++++++++++++++ > arch/arm64/kvm/hyp/pgtable.c | 2 +- > arch/arm64/kvm/hyp/vhe/tlb.c | 32 ++++++++++++++++++ > 5 files changed, 101 insertions(+), 1 deletion(-) > Reviewed-by: Gavin Shan <gshan@redhat.com> > diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h > index 43c3bc0f9544d..bb17b2ead4c71 100644 > --- a/arch/arm64/include/asm/kvm_asm.h > +++ b/arch/arm64/include/asm/kvm_asm.h > @@ -68,6 +68,7 @@ enum __kvm_host_smccc_func { > __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, > __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, > __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa, > + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh, > __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid, > __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, > __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, > @@ -225,6 +226,9 @@ extern void __kvm_flush_vm_context(void); > extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu); > extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, > int level); > +extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, > + phys_addr_t ipa, > + int level); > extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); > > extern void __kvm_timer_set_cntvoff(u64 cntvoff); > diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c > index 728e01d4536b0..c6bf1e49ca934 100644 > --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c > +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c > @@ -125,6 +125,15 @@ static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt) > __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level); > } > > +static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctxt) > +{ > + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); > + DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2); > + DECLARE_REG(int, level, host_ctxt, 3); > + > + __kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level); > +} > + > static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) > { > DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); > @@ -315,6 +324,7 @@ static const hcall_t host_hcall[] = { > HANDLE_FUNC(__kvm_vcpu_run), > HANDLE_FUNC(__kvm_flush_vm_context), > HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), > + HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh), > HANDLE_FUNC(__kvm_tlb_flush_vmid), > HANDLE_FUNC(__kvm_flush_cpu_context), > HANDLE_FUNC(__kvm_timer_set_cntvoff), > diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c > index d296d617f5896..ef2b70587f933 100644 > --- a/arch/arm64/kvm/hyp/nvhe/tlb.c > +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c > @@ -109,6 +109,60 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, > __tlb_switch_to_host(&cxt); > } > > +void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, > + phys_addr_t ipa, int level) > +{ > + struct tlb_inv_context cxt; > + > + dsb(nshst); > + > + /* Switch to requested VMID */ > + __tlb_switch_to_guest(mmu, &cxt); > + > + /* > + * We could do so much better if we had the VA as well. > + * Instead, we invalidate Stage-2 for this IPA, and the > + * whole of Stage-1. Weep... > + */ > + ipa >>= 12; > + __tlbi_level(ipas2e1, ipa, level); > + > + /* > + * We have to ensure completion of the invalidation at Stage-2, > + * since a table walk on another CPU could refill a TLB with a > + * complete (S1 + S2) walk based on the old Stage-2 mapping if > + * the Stage-1 invalidation happened first. > + */ > + dsb(nsh); > + __tlbi(vmalle1); > + dsb(nsh); > + isb(); > + > + /* > + * If the host is running at EL1 and we have a VPIPT I-cache, > + * then we must perform I-cache maintenance at EL2 in order for > + * it to have an effect on the guest. Since the guest cannot hit > + * I-cache lines allocated with a different VMID, we don't need > + * to worry about junk out of guest reset (we nuke the I-cache on > + * VMID rollover), but we do need to be careful when remapping > + * executable pages for the same guest. This can happen when KSM > + * takes a CoW fault on an executable page, copies the page into > + * a page that was previously mapped in the guest and then needs > + * to invalidate the guest view of the I-cache for that page > + * from EL1. To solve this, we invalidate the entire I-cache when > + * unmapping a page from a guest if we have a VPIPT I-cache but > + * the host is running at EL1. As above, we could do better if > + * we had the VA. > + * > + * The moral of this story is: if you have a VPIPT I-cache, then > + * you should be running with VHE enabled. > + */ > + if (icache_is_vpipt()) > + icache_inval_all_pou(); > + > + __tlb_switch_to_host(&cxt); > +} > + > void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) > { > struct tlb_inv_context cxt; > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c > index 48c5a95c6e8cd..023269dd84f76 100644 > --- a/arch/arm64/kvm/hyp/pgtable.c > +++ b/arch/arm64/kvm/hyp/pgtable.c > @@ -1189,7 +1189,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, > KVM_PGTABLE_WALK_HANDLE_FAULT | > KVM_PGTABLE_WALK_SHARED); > if (!ret) > - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); > + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level); > return ret; > } > > diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c > index 24cef9b87f9e9..e69da550cdc5b 100644 > --- a/arch/arm64/kvm/hyp/vhe/tlb.c > +++ b/arch/arm64/kvm/hyp/vhe/tlb.c > @@ -111,6 +111,38 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, > __tlb_switch_to_host(&cxt); > } > > +void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, > + phys_addr_t ipa, int level) > +{ > + struct tlb_inv_context cxt; > + > + dsb(nshst); > + > + /* Switch to requested VMID */ > + __tlb_switch_to_guest(mmu, &cxt); > + > + /* > + * We could do so much better if we had the VA as well. > + * Instead, we invalidate Stage-2 for this IPA, and the > + * whole of Stage-1. Weep... > + */ > + ipa >>= 12; > + __tlbi_level(ipas2e1, ipa, level); > + > + /* > + * We have to ensure completion of the invalidation at Stage-2, > + * since a table walk on another CPU could refill a TLB with a > + * complete (S1 + S2) walk based on the old Stage-2 mapping if > + * the Stage-1 invalidation happened first. > + */ > + dsb(nsh); > + __tlbi(vmalle1); > + dsb(nsh); > + isb(); > + > + __tlb_switch_to_host(&cxt); > +} > + > void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) > { > struct tlb_inv_context cxt; >
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 43c3bc0f9544d..bb17b2ead4c71 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -68,6 +68,7 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa, + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, @@ -225,6 +226,9 @@ extern void __kvm_flush_vm_context(void); extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu); extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, int level); +extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, + int level); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); extern void __kvm_timer_set_cntvoff(u64 cntvoff); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 728e01d4536b0..c6bf1e49ca934 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -125,6 +125,15 @@ static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt) __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level); } +static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2); + DECLARE_REG(int, level, host_ctxt, 3); + + __kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level); +} + static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); @@ -315,6 +324,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_vcpu_run), HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), + HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh), HANDLE_FUNC(__kvm_tlb_flush_vmid), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index d296d617f5896..ef2b70587f933 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -109,6 +109,60 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, __tlb_switch_to_host(&cxt); } +void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, int level) +{ + struct tlb_inv_context cxt; + + dsb(nshst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + __tlbi_level(ipas2e1, ipa, level); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(nsh); + __tlbi(vmalle1); + dsb(nsh); + isb(); + + /* + * If the host is running at EL1 and we have a VPIPT I-cache, + * then we must perform I-cache maintenance at EL2 in order for + * it to have an effect on the guest. Since the guest cannot hit + * I-cache lines allocated with a different VMID, we don't need + * to worry about junk out of guest reset (we nuke the I-cache on + * VMID rollover), but we do need to be careful when remapping + * executable pages for the same guest. This can happen when KSM + * takes a CoW fault on an executable page, copies the page into + * a page that was previously mapped in the guest and then needs + * to invalidate the guest view of the I-cache for that page + * from EL1. To solve this, we invalidate the entire I-cache when + * unmapping a page from a guest if we have a VPIPT I-cache but + * the host is running at EL1. As above, we could do better if + * we had the VA. + * + * The moral of this story is: if you have a VPIPT I-cache, then + * you should be running with VHE enabled. + */ + if (icache_is_vpipt()) + icache_inval_all_pou(); + + __tlb_switch_to_host(&cxt); +} + void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 48c5a95c6e8cd..023269dd84f76 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1189,7 +1189,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED); if (!ret) - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level); return ret; } diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index 24cef9b87f9e9..e69da550cdc5b 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -111,6 +111,38 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, __tlb_switch_to_host(&cxt); } +void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, int level) +{ + struct tlb_inv_context cxt; + + dsb(nshst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + __tlbi_level(ipas2e1, ipa, level); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(nsh); + __tlbi(vmalle1); + dsb(nsh); + isb(); + + __tlb_switch_to_host(&cxt); +} + void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt;