@@ -233,6 +233,8 @@ extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t start, unsigned long pages);
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_vae2is(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
+extern void __kvm_tlb_el1_instr(struct kvm_s2_mmu *mmu, u64 val, u64 sys_encoding);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
@@ -64,6 +64,13 @@ extern void kvm_init_nested(struct kvm *kvm);
extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu);
extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu);
extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu);
+
+union tlbi_info;
+
+extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
+ const union tlbi_info *info,
+ void (*)(struct kvm_s2_mmu *,
+ const union tlbi_info *));
extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
@@ -654,6 +654,10 @@
#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
/* TLBI instructions */
+#define TLBI_Op0 1
+#define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */
+#define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */
+
#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0)
#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1)
#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2)
@@ -231,3 +231,84 @@ void __kvm_flush_vm_context(void)
dsb(ish);
}
+
+void __kvm_tlb_vae2is(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ /*
+ * Execute the EL1 version of TLBI VAE2* instruction, forcing
+ * an upgrade to the Inner Shareable domain in order to
+ * perform the invalidation on all CPUs.
+ */
+ switch (sys_encoding) {
+ case OP_TLBI_VAE2:
+ case OP_TLBI_VAE2IS:
+ __tlbi(vae1is, va);
+ break;
+ case OP_TLBI_VALE2:
+ case OP_TLBI_VALE2IS:
+ __tlbi(vale1is, va);
+ break;
+ default:
+ break;
+ }
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_el1_instr(struct kvm_s2_mmu *mmu, u64 val, u64 sys_encoding)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ /*
+ * Execute the same instruction as the guest hypervisor did,
+ * expanding the scope of local TLB invalidations to the Inner
+ * Shareable domain so that it takes place on all CPUs. This
+ * is equivalent to having HCR_EL2.FB set.
+ */
+ switch (sys_encoding) {
+ case OP_TLBI_VMALLE1:
+ case OP_TLBI_VMALLE1IS:
+ __tlbi(vmalle1is);
+ break;
+ case OP_TLBI_VAE1:
+ case OP_TLBI_VAE1IS:
+ __tlbi(vae1is, val);
+ break;
+ case OP_TLBI_ASIDE1:
+ case OP_TLBI_ASIDE1IS:
+ __tlbi(aside1is, val);
+ break;
+ case OP_TLBI_VAAE1:
+ case OP_TLBI_VAAE1IS:
+ __tlbi(vaae1is, val);
+ break;
+ case OP_TLBI_VALE1:
+ case OP_TLBI_VALE1IS:
+ __tlbi(vale1is, val);
+ break;
+ case OP_TLBI_VAALE1:
+ case OP_TLBI_VAALE1IS:
+ __tlbi(vaale1is, val);
+ break;
+ default:
+ break;
+ }
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
@@ -173,10 +173,25 @@ int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
}
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
- gfn_t gfn, u64 nr_pages)
+ gfn_t gfn, u64 nr_pages)
{
- kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
- gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+ if (!kvm->arch.nested_mmus) {
+ /*
+ * For a normal (i.e. non-nested) guest, flush entries for the
+ * given VMID.
+ */
+ kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
+ gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+ } else {
+ /*
+ * When supporting nested virtualization, we can have multiple
+ * VMIDs in play for each VCPU in the VM, so it's really not
+ * worth it to try to quiesce the system and flush all the
+ * VMIDs that may be in use, instead just nuke the whole thing.
+ */
+ kvm_call_hyp(__kvm_flush_vm_context);
+ }
+
return 0;
}
@@ -351,6 +351,41 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
return ret;
}
+/*
+ * We can have multiple *different* MMU contexts with the same VMID:
+ *
+ * - S2 being enabled or not, hence differing by the HCR_EL2.VM bit
+ *
+ * - Multiple vcpus using private S2s (huh huh...), hence differing by the
+ * VBBTR_EL2.BADDR address
+ *
+ * - A combination of the above...
+ *
+ * We can always identify which MMU context to pick at run-time. However,
+ * TLB invalidation involving a VMID must take action on all the TLBs using
+ * this particular VMID. This translates into applying the same invalidation
+ * operation to all the contexts that are using this VMID. Moar phun!
+ */
+void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
+ const union tlbi_info *info,
+ void (*tlbi_callback)(struct kvm_s2_mmu *,
+ const union tlbi_info *))
+{
+ write_lock(&kvm->mmu_lock);
+
+ for (int i = 0; i < kvm->arch.nested_mmus_size; i++) {
+ struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
+
+ if (!kvm_s2_mmu_valid(mmu))
+ continue;
+
+ if (vmid == get_vmid(mmu->tlb_vttbr))
+ tlbi_callback(mmu, info);
+ }
+
+ write_unlock(&kvm->mmu_lock);
+}
+
/* Must be called with kvm->mmu_lock held */
struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu)
{
@@ -2906,6 +2906,216 @@ static bool handle_s12w(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return handle_s12(vcpu, p, r, true);
}
+static bool handle_alle2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ /*
+ * To emulate invalidating all EL2 regime stage 1 TLB entries for all
+ * PEs, executing TLBI VMALLE1IS is enough. But reuse the existing
+ * interface for the simplicity; invalidating stage 2 entries doesn't
+ * affect the correctness.
+ */
+ __kvm_tlb_flush_vmid(&vcpu->kvm->arch.mmu);
+ return true;
+}
+
+static bool handle_vae2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+ /*
+ * Based on the same principle as TLBI ALLE2 instruction
+ * emulation, we emulate TLBI VAE2* instructions by executing
+ * corresponding TLBI VAE1* instructions with the virtual
+ * EL2's VMID assigned by the host hypervisor.
+ */
+ __kvm_tlb_vae2is(&vcpu->kvm->arch.mmu, p->regval, sys_encoding);
+ return true;
+}
+
+static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+
+ write_lock(&vcpu->kvm->mmu_lock);
+
+ /*
+ * Clear all mappings in the shadow page tables and invalidate the stage
+ * 1 and 2 TLB entries via kvm_tlb_flush_vmid_ipa().
+ */
+ kvm_nested_s2_unmap(vcpu->kvm);
+
+ if (atomic64_read(&mmu->vmid.id)) {
+ /*
+ * Invalidate the stage 1 and 2 TLB entries for the host OS
+ * in a VM only if there is one.
+ */
+ __kvm_tlb_flush_vmid(mmu);
+ }
+
+ write_unlock(&vcpu->kvm->mmu_lock);
+
+ return true;
+}
+
+/* Only defined here as this is an internal "abstraction" */
+union tlbi_info {
+ struct {
+ u64 start;
+ u64 size;
+ } range;
+
+ struct {
+ u64 addr;
+ } ipa;
+
+ struct {
+ u64 addr;
+ u32 encoding;
+ } va;
+};
+
+static void s2_mmu_unmap_stage2_range(struct kvm_s2_mmu *mmu,
+ const union tlbi_info *info)
+{
+ kvm_unmap_stage2_range(mmu, info->range.start, info->range.size);
+}
+
+static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 limit, vttbr;
+
+ vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+ limit = BIT_ULL(kvm_get_pa_bits(vcpu->kvm));
+
+ kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr),
+ &(union tlbi_info) {
+ .range = {
+ .start = 0,
+ .size = limit,
+ },
+ },
+ s2_mmu_unmap_stage2_range);
+
+ return true;
+}
+
+static void s2_mmu_unmap_stage2_ipa(struct kvm_s2_mmu *mmu,
+ const union tlbi_info *info)
+{
+ unsigned long max_size;
+ u64 base_addr;
+
+ /*
+ * We drop a number of things from the supplied value:
+ *
+ * - NS bit: we're non-secure only.
+ *
+ * - TTL field: We already have the granule size from the
+ * VTCR_EL2.TG0 field, and the level is only relevant to the
+ * guest's S2PT.
+ *
+ * - IPA[51:48]: We don't support 52bit IPA just yet...
+ *
+ * And of course, adjust the IPA to be on an actual address.
+ */
+ base_addr = (info->ipa.addr & GENMASK_ULL(35, 0)) << 12;
+
+ /* Compute the maximum extent of the invalidation */
+ switch (mmu->tlb_vtcr & VTCR_EL2_TG0_MASK) {
+ case VTCR_EL2_TG0_4K:
+ max_size = SZ_1G;
+ break;
+ case VTCR_EL2_TG0_16K:
+ max_size = SZ_32M;
+ break;
+ case VTCR_EL2_TG0_64K:
+ /*
+ * No, we do not support 52bit IPA in nested yet. Once
+ * we do, this should be 4TB.
+ */
+ /* FIXME: remove the 52bit PA support from the IDregs */
+ max_size = SZ_512M;
+ break;
+ default:
+ BUG();
+ }
+
+ base_addr &= ~(max_size - 1);
+
+ kvm_unmap_stage2_range(mmu, base_addr, max_size);
+}
+
+static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+
+ kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr),
+ &(union tlbi_info) {
+ .ipa = {
+ .addr = p->regval,
+ },
+ },
+ s2_mmu_unmap_stage2_ipa);
+
+ return true;
+}
+
+static void s2_mmu_unmap_stage2_va(struct kvm_s2_mmu *mmu,
+ const union tlbi_info *info)
+{
+ __kvm_tlb_el1_instr(mmu, info->va.addr, info->va.encoding);
+}
+
+static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+ u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+
+ /*
+ * If we're here, this is because we've trapped on a EL1 TLBI
+ * instruction that affects the EL1 translation regime while
+ * we're running in a context that doesn't allow us to let the
+ * HW do its thing (aka vEL2):
+ *
+ * - HCR_EL2.E2H == 0 : a non-VHE guest
+ * - HCR_EL2.{E2H,TGE} == { 1, 0 } : a VHE guest in guest mode
+ *
+ * We don't expect these helpers to ever be called when running
+ * in a vEL1 context.
+ */
+
+ WARN_ON(!vcpu_is_el2(vcpu));
+
+ if ((__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ mutex_lock(&vcpu->kvm->lock);
+ /*
+ * ARMv8.4-NV allows the guest to change TGE behind
+ * our back, so we always trap EL1 TLBIs from vEL2...
+ */
+ __kvm_tlb_el1_instr(&vcpu->kvm->arch.mmu, p->regval, sys_encoding);
+ mutex_unlock(&vcpu->kvm->lock);
+
+ return true;
+ }
+
+ kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr),
+ &(union tlbi_info) {
+ .va = {
+ .addr = p->regval,
+ .encoding = sys_encoding,
+ },
+ },
+ s2_mmu_unmap_stage2_va);
+
+ return true;
+}
+
/*
* AT instruction emulation
*
@@ -2997,12 +3207,40 @@ static struct sys_reg_desc sys_insn_descs[] = {
{ SYS_DESC(SYS_DC_CIGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CIGDSW), access_dcgsw },
+ SYS_INSN(TLBI_VMALLE1IS, handle_tlbi_el1),
+ SYS_INSN(TLBI_VAE1IS, handle_tlbi_el1),
+ SYS_INSN(TLBI_ASIDE1IS, handle_tlbi_el1),
+ SYS_INSN(TLBI_VAAE1IS, handle_tlbi_el1),
+ SYS_INSN(TLBI_VALE1IS, handle_tlbi_el1),
+ SYS_INSN(TLBI_VAALE1IS, handle_tlbi_el1),
+ SYS_INSN(TLBI_VMALLE1, handle_tlbi_el1),
+ SYS_INSN(TLBI_VAE1, handle_tlbi_el1),
+ SYS_INSN(TLBI_ASIDE1, handle_tlbi_el1),
+ SYS_INSN(TLBI_VAAE1, handle_tlbi_el1),
+ SYS_INSN(TLBI_VALE1, handle_tlbi_el1),
+ SYS_INSN(TLBI_VAALE1, handle_tlbi_el1),
+
SYS_INSN(AT_S1E2R, handle_s1e2),
SYS_INSN(AT_S1E2W, handle_s1e2),
SYS_INSN(AT_S12E1R, handle_s12r),
SYS_INSN(AT_S12E1W, handle_s12w),
SYS_INSN(AT_S12E0R, handle_s12r),
SYS_INSN(AT_S12E0W, handle_s12w),
+
+ SYS_INSN(TLBI_IPAS2E1IS, handle_ipas2e1is),
+ SYS_INSN(TLBI_IPAS2LE1IS, handle_ipas2e1is),
+ SYS_INSN(TLBI_ALLE2IS, handle_alle2is),
+ SYS_INSN(TLBI_VAE2IS, handle_vae2is),
+ SYS_INSN(TLBI_ALLE1IS, handle_alle1is),
+ SYS_INSN(TLBI_VALE2IS, handle_vae2is),
+ SYS_INSN(TLBI_VMALLS12E1IS, handle_vmalls12e1is),
+ SYS_INSN(TLBI_IPAS2E1, handle_ipas2e1is),
+ SYS_INSN(TLBI_IPAS2LE1, handle_ipas2e1is),
+ SYS_INSN(TLBI_ALLE2, handle_alle2is),
+ SYS_INSN(TLBI_VAE2, handle_vae2is),
+ SYS_INSN(TLBI_ALLE1, handle_alle1is),
+ SYS_INSN(TLBI_VALE2, handle_vae2is),
+ SYS_INSN(TLBI_VMALLS12E1, handle_vmalls12e1is),
};
static const struct sys_reg_desc *first_idreg;