Message ID | 20230602221447.1809849-4-surajjs@amazon.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/3] KVM: arm64: Update id_reg limit value based on per vcpu flags | expand |
On Fri, 02 Jun 2023 23:14:47 +0100, Suraj Jitindar Singh <surajjs@amazon.com> wrote: > > With per guest ID registers, MTE settings from userspace can be stored in > its corresponding ID register. > > No functional change intended. > > Signed-off-by: Suraj Jitindar Singh <surajjs@amazon.com> > --- > arch/arm64/include/asm/kvm_host.h | 21 ++++++++++----------- > arch/arm64/kvm/arm.c | 11 ++++++++++- > arch/arm64/kvm/sys_regs.c | 5 +++++ > 3 files changed, 25 insertions(+), 12 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 7b0f43373dbe..861997a14ba1 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -226,9 +226,7 @@ struct kvm_arch { > */ > #define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0 > /* Memory Tagging Extension enabled for the guest */ > -#define KVM_ARCH_FLAG_MTE_ENABLED 1 > - /* At least one vCPU has ran in the VM */ > -#define KVM_ARCH_FLAG_HAS_RAN_ONCE 2 > +#define KVM_ARCH_FLAG_HAS_RAN_ONCE 1 > /* > * The following two bits are used to indicate the guest's EL1 > * register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT > @@ -236,22 +234,22 @@ struct kvm_arch { > * Otherwise, the guest's EL1 register width has not yet been > * determined yet. > */ > -#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3 > -#define KVM_ARCH_FLAG_EL1_32BIT 4 > +#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 2 > +#define KVM_ARCH_FLAG_EL1_32BIT 3 > /* PSCI SYSTEM_SUSPEND enabled for the guest */ > -#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 > +#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 4 > /* VM counter offset */ > -#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6 > +#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5 > /* Timer PPIs made immutable */ > -#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7 > +#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6 > /* SMCCC filter initialized for the VM */ > -#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 8 > +#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7 > /* > * AA64DFR0_EL1.PMUver was set as ID_AA64DFR0_EL1_PMUVer_IMP_DEF > * or DFR0_EL1.PerfMon was set as ID_DFR0_EL1_PerfMon_IMPDEF from > * userspace for VCPUs without PMU. > */ > -#define KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU 9 > +#define KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU 8 > > unsigned long flags; > > @@ -1112,7 +1110,8 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); > > #define kvm_has_mte(kvm) \ > (system_supports_mte() && \ > - test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags)) > + FIELD_GET(ID_AA64PFR1_EL1_MTE_MASK, \ > + IDREG(kvm, SYS_ID_AA64PFR1_EL1))) > > #define kvm_supports_32bit_el0() \ > (system_supports_32bit_el0() && \ > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index ca18c09ccf82..6fc4190559d1 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -80,8 +80,17 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, > if (!system_supports_mte() || kvm->created_vcpus) { > r = -EINVAL; > } else { > + u64 val; > + > + /* Protects the idregs against modification */ > + mutex_lock(&kvm->arch.config_lock); > + > + val = IDREG(kvm, SYS_ID_AA64PFR1_EL1); > + val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_MASK, 1); The architecture specifies 3 versions of MTE in the published ARM ARM, with a 4th coming up as part of the 2022 extensions. Why are you actively crippling the MTE version presented to the guest, and potentially introduce unexpected behaviours? > + IDREG(kvm, SYS_ID_AA64PFR1_EL1) = val; > + > + mutex_unlock(&kvm->arch.config_lock); > r = 0; > - set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); > } > mutex_unlock(&kvm->lock); > break; > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index 59f8adda47fa..8cffb82dd10d 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -3672,6 +3672,11 @@ void kvm_arm_init_id_regs(struct kvm *kvm) > idreg++; > id = reg_to_encoding(idreg); > } > + > + /* MTE disabled by default even when supported */ > + val = IDREG(kvm, SYS_ID_AA64PFR1_EL1); > + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); > + IDREG(kvm, SYS_ID_AA64PFR1_EL1) = val; > } > > int __init kvm_sys_reg_table_init(void) Overall, I don't really see the point of such a change. It creates more problems than it solves. Thanks, M.
On Sat, Jun 03 2023, Marc Zyngier <maz@kernel.org> wrote: > On Fri, 02 Jun 2023 23:14:47 +0100, > Suraj Jitindar Singh <surajjs@amazon.com> wrote: >> >> With per guest ID registers, MTE settings from userspace can be stored in >> its corresponding ID register. >> >> No functional change intended. >> >> Signed-off-by: Suraj Jitindar Singh <surajjs@amazon.com> >> --- >> arch/arm64/include/asm/kvm_host.h | 21 ++++++++++----------- >> arch/arm64/kvm/arm.c | 11 ++++++++++- >> arch/arm64/kvm/sys_regs.c | 5 +++++ >> 3 files changed, 25 insertions(+), 12 deletions(-) >> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c >> index ca18c09ccf82..6fc4190559d1 100644 >> --- a/arch/arm64/kvm/arm.c >> +++ b/arch/arm64/kvm/arm.c >> @@ -80,8 +80,17 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, >> if (!system_supports_mte() || kvm->created_vcpus) { >> r = -EINVAL; >> } else { >> + u64 val; >> + >> + /* Protects the idregs against modification */ >> + mutex_lock(&kvm->arch.config_lock); >> + >> + val = IDREG(kvm, SYS_ID_AA64PFR1_EL1); >> + val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_MASK, 1); > > The architecture specifies 3 versions of MTE in the published ARM ARM, > with a 4th coming up as part of the 2022 extensions. Is that the one that adds some more MTE<foo> bits in AA64PFR1 and AA64PFR2? > Why are you > actively crippling the MTE version presented to the guest, and > potentially introduce unexpected behaviours? While the code does not look correct here, I think we'll need some way to control which version of MTE is presented to the guest for compatibility handling; does it make sense to control this per-cpu, or does it need to be a vm-wide setting?
On Mon, 05 Jun 2023 17:39:50 +0100, Cornelia Huck <cohuck@redhat.com> wrote: > > On Sat, Jun 03 2023, Marc Zyngier <maz@kernel.org> wrote: > > > On Fri, 02 Jun 2023 23:14:47 +0100, > > Suraj Jitindar Singh <surajjs@amazon.com> wrote: > >> > >> With per guest ID registers, MTE settings from userspace can be stored in > >> its corresponding ID register. > >> > >> No functional change intended. > >> > >> Signed-off-by: Suraj Jitindar Singh <surajjs@amazon.com> > >> --- > >> arch/arm64/include/asm/kvm_host.h | 21 ++++++++++----------- > >> arch/arm64/kvm/arm.c | 11 ++++++++++- > >> arch/arm64/kvm/sys_regs.c | 5 +++++ > >> 3 files changed, 25 insertions(+), 12 deletions(-) > >> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > >> index ca18c09ccf82..6fc4190559d1 100644 > >> --- a/arch/arm64/kvm/arm.c > >> +++ b/arch/arm64/kvm/arm.c > >> @@ -80,8 +80,17 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, > >> if (!system_supports_mte() || kvm->created_vcpus) { > >> r = -EINVAL; > >> } else { > >> + u64 val; > >> + > >> + /* Protects the idregs against modification */ > >> + mutex_lock(&kvm->arch.config_lock); > >> + > >> + val = IDREG(kvm, SYS_ID_AA64PFR1_EL1); > >> + val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_MASK, 1); > > > > The architecture specifies 3 versions of MTE in the published ARM ARM, > > with a 4th coming up as part of the 2022 extensions. > > Is that the one that adds some more MTE<foo> bits in AA64PFR1 and > AA64PFR2? Yeah, that. You get ID_AA64PFR1_EL1.{MTE,MTE_frac,MTEX}, plus ID_AA64PFR2_EL1.{MTEFAR,MTESTOREONLY,MTEPERM}... It this sounds like a train wreck, then it probably is one! > > > Why are you > > actively crippling the MTE version presented to the guest, and > > potentially introduce unexpected behaviours? > > While the code does not look correct here, I think we'll need some way to > control which version of MTE is presented to the guest for compatibility > handling; does it make sense to control this per-cpu, or does it need to > be a vm-wide setting? It absolutely needs to be VM-wide. Only having half the vcpus supporting tags wouldn't make much sense. But the problem is that the various versions of MTE are not necessarily compatible, as MTE4 makes MTE3 optional (with a fallback to MTE2)... There are more subtleties around the what instructions are available in which mode, and whether the various subfeatures can be configured or not. Thanks, M.
On Tue, Jun 06 2023, Marc Zyngier <maz@kernel.org> wrote: > On Mon, 05 Jun 2023 17:39:50 +0100, > Cornelia Huck <cohuck@redhat.com> wrote: >> >> On Sat, Jun 03 2023, Marc Zyngier <maz@kernel.org> wrote: >> >> > On Fri, 02 Jun 2023 23:14:47 +0100, >> > Suraj Jitindar Singh <surajjs@amazon.com> wrote: >> >> >> >> With per guest ID registers, MTE settings from userspace can be stored in >> >> its corresponding ID register. >> >> >> >> No functional change intended. >> >> >> >> Signed-off-by: Suraj Jitindar Singh <surajjs@amazon.com> >> >> --- >> >> arch/arm64/include/asm/kvm_host.h | 21 ++++++++++----------- >> >> arch/arm64/kvm/arm.c | 11 ++++++++++- >> >> arch/arm64/kvm/sys_regs.c | 5 +++++ >> >> 3 files changed, 25 insertions(+), 12 deletions(-) >> >> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c >> >> index ca18c09ccf82..6fc4190559d1 100644 >> >> --- a/arch/arm64/kvm/arm.c >> >> +++ b/arch/arm64/kvm/arm.c >> >> @@ -80,8 +80,17 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, >> >> if (!system_supports_mte() || kvm->created_vcpus) { >> >> r = -EINVAL; >> >> } else { >> >> + u64 val; >> >> + >> >> + /* Protects the idregs against modification */ >> >> + mutex_lock(&kvm->arch.config_lock); >> >> + >> >> + val = IDREG(kvm, SYS_ID_AA64PFR1_EL1); >> >> + val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_MASK, 1); >> > >> > The architecture specifies 3 versions of MTE in the published ARM ARM, >> > with a 4th coming up as part of the 2022 extensions. >> >> Is that the one that adds some more MTE<foo> bits in AA64PFR1 and >> AA64PFR2? > > Yeah, that. You get ID_AA64PFR1_EL1.{MTE,MTE_frac,MTEX}, plus > ID_AA64PFR2_EL1.{MTEFAR,MTESTOREONLY,MTEPERM}... It this sounds like a > train wreck, then it probably is one! With that many features, what could possibly go wrong! > >> >> > Why are you >> > actively crippling the MTE version presented to the guest, and >> > potentially introduce unexpected behaviours? >> >> While the code does not look correct here, I think we'll need some way to >> control which version of MTE is presented to the guest for compatibility >> handling; does it make sense to control this per-cpu, or does it need to >> be a vm-wide setting? > > It absolutely needs to be VM-wide. Only having half the vcpus > supporting tags wouldn't make much sense. > > But the problem is that the various versions of MTE are not > necessarily compatible, as MTE4 makes MTE3 optional (with a fallback > to MTE2)... There are more subtleties around the what instructions are > available in which mode, and whether the various subfeatures can be > configured or not. So I guess we'll have to expose all of that to userspace, so that it can actually configure the various configurations that will surely show up in the wild...
On Tue, Jun 06, 2023 at 05:42:24PM +0100, Marc Zyngier wrote: > On Mon, 05 Jun 2023 17:39:50 +0100, > Cornelia Huck <cohuck@redhat.com> wrote: > > On Sat, Jun 03 2023, Marc Zyngier <maz@kernel.org> wrote: > > > On Fri, 02 Jun 2023 23:14:47 +0100, > > > Suraj Jitindar Singh <surajjs@amazon.com> wrote: > > >> > > >> With per guest ID registers, MTE settings from userspace can be stored in > > >> its corresponding ID register. > > >> > > >> No functional change intended. > > >> > > >> Signed-off-by: Suraj Jitindar Singh <surajjs@amazon.com> > > >> --- > > >> arch/arm64/include/asm/kvm_host.h | 21 ++++++++++----------- > > >> arch/arm64/kvm/arm.c | 11 ++++++++++- > > >> arch/arm64/kvm/sys_regs.c | 5 +++++ > > >> 3 files changed, 25 insertions(+), 12 deletions(-) > > >> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > > >> index ca18c09ccf82..6fc4190559d1 100644 > > >> --- a/arch/arm64/kvm/arm.c > > >> +++ b/arch/arm64/kvm/arm.c > > >> @@ -80,8 +80,17 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, > > >> if (!system_supports_mte() || kvm->created_vcpus) { > > >> r = -EINVAL; > > >> } else { > > >> + u64 val; > > >> + > > >> + /* Protects the idregs against modification */ > > >> + mutex_lock(&kvm->arch.config_lock); > > >> + > > >> + val = IDREG(kvm, SYS_ID_AA64PFR1_EL1); > > >> + val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_MASK, 1); > > > > > > The architecture specifies 3 versions of MTE in the published ARM ARM, > > > with a 4th coming up as part of the 2022 extensions. > > > > Is that the one that adds some more MTE<foo> bits in AA64PFR1 and > > AA64PFR2? > > Yeah, that. You get ID_AA64PFR1_EL1.{MTE,MTE_frac,MTEX}, plus > ID_AA64PFR2_EL1.{MTEFAR,MTESTOREONLY,MTEPERM}... It this sounds like a > train wreck, then it probably is one! I stared about an hour at that documentation and I think I got it (well, for the next couple of hours). The disappearing of MTE_FEAT_ASYNC from MTE2 is potentially problematic but the worst that can happen is that async faults are simply not triggered (and TBH, those "faults" were not that useful anyway). MTE4 without ASYM is defined in a weird way. Basically there's no such thing as MTE4, just 2 and 3 (the latter bringing in ASYM) with some extra features like store-only, stage 2 permission, canonical tag checking. I don't think any of these new MTE extensions add any state that KVM should care context-switch, so we should be fine. Does KVM limit the maximum value of the ID field exposed to user? Some future MTE9 may add new state, so better to be safe (I thought we handled these cases but can't find it now). It's also probably safe to disable MTE altogether if there's any difference between all these fields on different CPUs (I don't think we currently do, we just go for lower safe while ignoring MTE_frac, MTEX). Regarding MTEX, I don't think Linux would ever make use of the canonical tag checking. The enabling bit is unfortunately in TCR_EL1 which we don't context-switch (and maybe cached in the TLB, I haven't checked the latest spec).
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7b0f43373dbe..861997a14ba1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -226,9 +226,7 @@ struct kvm_arch { */ #define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0 /* Memory Tagging Extension enabled for the guest */ -#define KVM_ARCH_FLAG_MTE_ENABLED 1 - /* At least one vCPU has ran in the VM */ -#define KVM_ARCH_FLAG_HAS_RAN_ONCE 2 +#define KVM_ARCH_FLAG_HAS_RAN_ONCE 1 /* * The following two bits are used to indicate the guest's EL1 * register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT @@ -236,22 +234,22 @@ struct kvm_arch { * Otherwise, the guest's EL1 register width has not yet been * determined yet. */ -#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3 -#define KVM_ARCH_FLAG_EL1_32BIT 4 +#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 2 +#define KVM_ARCH_FLAG_EL1_32BIT 3 /* PSCI SYSTEM_SUSPEND enabled for the guest */ -#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 +#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 4 /* VM counter offset */ -#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6 +#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5 /* Timer PPIs made immutable */ -#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7 +#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6 /* SMCCC filter initialized for the VM */ -#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 8 +#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7 /* * AA64DFR0_EL1.PMUver was set as ID_AA64DFR0_EL1_PMUVer_IMP_DEF * or DFR0_EL1.PerfMon was set as ID_DFR0_EL1_PerfMon_IMPDEF from * userspace for VCPUs without PMU. */ -#define KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU 9 +#define KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU 8 unsigned long flags; @@ -1112,7 +1110,8 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_has_mte(kvm) \ (system_supports_mte() && \ - test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags)) + FIELD_GET(ID_AA64PFR1_EL1_MTE_MASK, \ + IDREG(kvm, SYS_ID_AA64PFR1_EL1))) #define kvm_supports_32bit_el0() \ (system_supports_32bit_el0() && \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ca18c09ccf82..6fc4190559d1 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -80,8 +80,17 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, if (!system_supports_mte() || kvm->created_vcpus) { r = -EINVAL; } else { + u64 val; + + /* Protects the idregs against modification */ + mutex_lock(&kvm->arch.config_lock); + + val = IDREG(kvm, SYS_ID_AA64PFR1_EL1); + val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_MASK, 1); + IDREG(kvm, SYS_ID_AA64PFR1_EL1) = val; + + mutex_unlock(&kvm->arch.config_lock); r = 0; - set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); } mutex_unlock(&kvm->lock); break; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 59f8adda47fa..8cffb82dd10d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3672,6 +3672,11 @@ void kvm_arm_init_id_regs(struct kvm *kvm) idreg++; id = reg_to_encoding(idreg); } + + /* MTE disabled by default even when supported */ + val = IDREG(kvm, SYS_ID_AA64PFR1_EL1); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); + IDREG(kvm, SYS_ID_AA64PFR1_EL1) = val; } int __init kvm_sys_reg_table_init(void)
With per guest ID registers, MTE settings from userspace can be stored in its corresponding ID register. No functional change intended. Signed-off-by: Suraj Jitindar Singh <surajjs@amazon.com> --- arch/arm64/include/asm/kvm_host.h | 21 ++++++++++----------- arch/arm64/kvm/arm.c | 11 ++++++++++- arch/arm64/kvm/sys_regs.c | 5 +++++ 3 files changed, 25 insertions(+), 12 deletions(-)