Message ID | 20230324144704.4193635-12-maz@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: Rework timer offsetting for fun and profit | expand |
On Fri, Mar 24, 2023 at 02:46:57PM +0000, Marc Zyngier wrote: > Having the timer IRQs duplicated into each vcpu isn't great, and > becomes absolutely awful with NV. So let's move these into > the per-VM arch_timer_vm_data structure. > > This simplifies a lot of code, but requires us to introduce a > mutex so that we can reason about userspace trying to change > an interrupt number while another vcpu is running, something > that wasn't really well handled so far. > > Reviewed-by: Colton Lewis <coltonlewis@google.com> > Signed-off-by: Marc Zyngier <maz@kernel.org> > --- > arch/arm64/include/asm/kvm_host.h | 2 + > arch/arm64/kvm/arch_timer.c | 104 +++++++++++++++++------------- > arch/arm64/kvm/arm.c | 2 + > include/kvm/arm_arch_timer.h | 18 ++++-- > 4 files changed, 78 insertions(+), 48 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 116233a390e9..1280154c9ef3 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -223,6 +223,8 @@ struct kvm_arch { > #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 > /* VM counter offset */ > #define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6 > + /* Timer PPIs made immutable */ > +#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7 > > unsigned long flags; > > diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c > index 7cd0b0947454..88a38d45d352 100644 > --- a/arch/arm64/kvm/arch_timer.c > +++ b/arch/arm64/kvm/arch_timer.c > @@ -851,7 +851,6 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) > > hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); > ctxt->hrtimer.function = kvm_hrtimer_expire; > - timer_irq(ctxt) = default_ppi[timerid]; > > switch (timerid) { > case TIMER_PTIMER: > @@ -880,6 +879,13 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) > timer->bg_timer.function = kvm_bg_timer_expire; > } > > +void kvm_timer_init_vm(struct kvm *kvm) > +{ > + mutex_init(&kvm->arch.timer_data.lock); > + for (int i = 0; i < NR_KVM_TIMERS; i++) > + kvm->arch.timer_data.ppi[i] = default_ppi[i]; > +} > + > void kvm_timer_cpu_up(void) > { > enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); > @@ -1292,44 +1298,52 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) > > static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) > { > - int vtimer_irq, ptimer_irq, ret; > - unsigned long i; > + u32 ppis = 0; > > - vtimer_irq = timer_irq(vcpu_vtimer(vcpu)); > - ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); > - if (ret) > - return false; > + mutex_lock(&vcpu->kvm->arch.timer_data.lock); > > - ptimer_irq = timer_irq(vcpu_ptimer(vcpu)); > - ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); > - if (ret) > - return false; > + for (int i = 0; i < NR_KVM_TIMERS; i++) { > + struct arch_timer_context *ctx; > + int irq; > > - kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { > - if (timer_irq(vcpu_vtimer(vcpu)) != vtimer_irq || > - timer_irq(vcpu_ptimer(vcpu)) != ptimer_irq) > - return false; > + ctx = vcpu_get_timer(vcpu, i); > + irq = timer_irq(ctx); > + if (kvm_vgic_set_owner(vcpu, irq, ctx)) > + break; > + > + /* > + * We know by construction that we only have PPIs, so > + * all values are less than 32. > + */ > + ppis |= BIT(irq); > } > > - return true; > + set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); > + > + mutex_unlock(&vcpu->kvm->arch.timer_data.lock); > + > + return hweight32(ppis) == NR_KVM_TIMERS; Does it make sense to only set the IMMUTABLE flag if the timer IRQs are indeed valid? I doubt userspace would do anything when it gets the EINVAL, but it is possible userspace could make another attempt at configuring the IRQs correctly. I believe that was the existing behavior of the UAPI.
On Thu, 30 Mar 2023 08:02:13 +0100, Oliver Upton <oliver.upton@linux.dev> wrote: > > On Fri, Mar 24, 2023 at 02:46:57PM +0000, Marc Zyngier wrote: > > Having the timer IRQs duplicated into each vcpu isn't great, and > > becomes absolutely awful with NV. So let's move these into > > the per-VM arch_timer_vm_data structure. > > > > This simplifies a lot of code, but requires us to introduce a > > mutex so that we can reason about userspace trying to change > > an interrupt number while another vcpu is running, something > > that wasn't really well handled so far. > > > > Reviewed-by: Colton Lewis <coltonlewis@google.com> > > Signed-off-by: Marc Zyngier <maz@kernel.org> > > --- > > arch/arm64/include/asm/kvm_host.h | 2 + > > arch/arm64/kvm/arch_timer.c | 104 +++++++++++++++++------------- > > arch/arm64/kvm/arm.c | 2 + > > include/kvm/arm_arch_timer.h | 18 ++++-- > > 4 files changed, 78 insertions(+), 48 deletions(-) > > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > index 116233a390e9..1280154c9ef3 100644 > > --- a/arch/arm64/include/asm/kvm_host.h > > +++ b/arch/arm64/include/asm/kvm_host.h > > @@ -223,6 +223,8 @@ struct kvm_arch { > > #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 > > /* VM counter offset */ > > #define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6 > > + /* Timer PPIs made immutable */ > > +#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7 > > > > unsigned long flags; > > > > diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c > > index 7cd0b0947454..88a38d45d352 100644 > > --- a/arch/arm64/kvm/arch_timer.c > > +++ b/arch/arm64/kvm/arch_timer.c > > @@ -851,7 +851,6 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) > > > > hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); > > ctxt->hrtimer.function = kvm_hrtimer_expire; > > - timer_irq(ctxt) = default_ppi[timerid]; > > > > switch (timerid) { > > case TIMER_PTIMER: > > @@ -880,6 +879,13 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) > > timer->bg_timer.function = kvm_bg_timer_expire; > > } > > > > +void kvm_timer_init_vm(struct kvm *kvm) > > +{ > > + mutex_init(&kvm->arch.timer_data.lock); > > + for (int i = 0; i < NR_KVM_TIMERS; i++) > > + kvm->arch.timer_data.ppi[i] = default_ppi[i]; > > +} > > + > > void kvm_timer_cpu_up(void) > > { > > enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); > > @@ -1292,44 +1298,52 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) > > > > static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) > > { > > - int vtimer_irq, ptimer_irq, ret; > > - unsigned long i; > > + u32 ppis = 0; > > > > - vtimer_irq = timer_irq(vcpu_vtimer(vcpu)); > > - ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); > > - if (ret) > > - return false; > > + mutex_lock(&vcpu->kvm->arch.timer_data.lock); > > > > - ptimer_irq = timer_irq(vcpu_ptimer(vcpu)); > > - ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); > > - if (ret) > > - return false; > > + for (int i = 0; i < NR_KVM_TIMERS; i++) { > > + struct arch_timer_context *ctx; > > + int irq; > > > > - kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { > > - if (timer_irq(vcpu_vtimer(vcpu)) != vtimer_irq || > > - timer_irq(vcpu_ptimer(vcpu)) != ptimer_irq) > > - return false; > > + ctx = vcpu_get_timer(vcpu, i); > > + irq = timer_irq(ctx); > > + if (kvm_vgic_set_owner(vcpu, irq, ctx)) > > + break; > > + > > + /* > > + * We know by construction that we only have PPIs, so > > + * all values are less than 32. > > + */ > > + ppis |= BIT(irq); > > } > > > > - return true; > > + set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); > > + > > + mutex_unlock(&vcpu->kvm->arch.timer_data.lock); > > + > > + return hweight32(ppis) == NR_KVM_TIMERS; > > Does it make sense to only set the IMMUTABLE flag if the timer IRQs are > indeed valid? I doubt userspace would do anything when it gets the > EINVAL, but it is possible userspace could make another attempt at > configuring the IRQs correctly. Yup, that's fair enough. I'll flip things around. Thanks! M.
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 116233a390e9..1280154c9ef3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -223,6 +223,8 @@ struct kvm_arch { #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 /* VM counter offset */ #define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6 + /* Timer PPIs made immutable */ +#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7 unsigned long flags; diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 7cd0b0947454..88a38d45d352 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -851,7 +851,6 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); ctxt->hrtimer.function = kvm_hrtimer_expire; - timer_irq(ctxt) = default_ppi[timerid]; switch (timerid) { case TIMER_PTIMER: @@ -880,6 +879,13 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) timer->bg_timer.function = kvm_bg_timer_expire; } +void kvm_timer_init_vm(struct kvm *kvm) +{ + mutex_init(&kvm->arch.timer_data.lock); + for (int i = 0; i < NR_KVM_TIMERS; i++) + kvm->arch.timer_data.ppi[i] = default_ppi[i]; +} + void kvm_timer_cpu_up(void) { enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); @@ -1292,44 +1298,52 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) { - int vtimer_irq, ptimer_irq, ret; - unsigned long i; + u32 ppis = 0; - vtimer_irq = timer_irq(vcpu_vtimer(vcpu)); - ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); - if (ret) - return false; + mutex_lock(&vcpu->kvm->arch.timer_data.lock); - ptimer_irq = timer_irq(vcpu_ptimer(vcpu)); - ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); - if (ret) - return false; + for (int i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *ctx; + int irq; - kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { - if (timer_irq(vcpu_vtimer(vcpu)) != vtimer_irq || - timer_irq(vcpu_ptimer(vcpu)) != ptimer_irq) - return false; + ctx = vcpu_get_timer(vcpu, i); + irq = timer_irq(ctx); + if (kvm_vgic_set_owner(vcpu, irq, ctx)) + break; + + /* + * We know by construction that we only have PPIs, so + * all values are less than 32. + */ + ppis |= BIT(irq); } - return true; + set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); + + mutex_unlock(&vcpu->kvm->arch.timer_data.lock); + + return hweight32(ppis) == NR_KVM_TIMERS; } bool kvm_arch_timer_get_input_level(int vintid) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); - struct arch_timer_context *timer; if (WARN(!vcpu, "No vcpu context!\n")) return false; - if (vintid == timer_irq(vcpu_vtimer(vcpu))) - timer = vcpu_vtimer(vcpu); - else if (vintid == timer_irq(vcpu_ptimer(vcpu))) - timer = vcpu_ptimer(vcpu); - else - BUG(); + for (int i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *ctx; + + ctx = vcpu_get_timer(vcpu, i); + if (timer_irq(ctx) == vintid) + return kvm_timer_should_fire(ctx); + } - return kvm_timer_should_fire(timer); + /* A timer IRQ has fired, but no matching timer was found? */ + WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); + + return false; } int kvm_timer_enable(struct kvm_vcpu *vcpu) @@ -1385,23 +1399,10 @@ void kvm_timer_init_vhe(void) sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV); } -static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) -{ - struct kvm_vcpu *vcpu; - unsigned long i; - - kvm_for_each_vcpu(i, vcpu, kvm) { - timer_irq(vcpu_vtimer(vcpu)) = vtimer_irq; - timer_irq(vcpu_ptimer(vcpu)) = ptimer_irq; - } -} - int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { int __user *uaddr = (int __user *)(long)attr->addr; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - int irq; + int irq, idx, ret = 0; if (!irqchip_in_kernel(vcpu->kvm)) return -EINVAL; @@ -1412,21 +1413,36 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!(irq_is_ppi(irq))) return -EINVAL; - if (vcpu->arch.timer_cpu.enabled) - return -EBUSY; + mutex_lock(&vcpu->kvm->arch.timer_data.lock); + + if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, + &vcpu->kvm->arch.flags)) { + ret = -EBUSY; + goto out; + } switch (attr->attr) { case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: - set_timer_irqs(vcpu->kvm, irq, timer_irq(ptimer)); + idx = TIMER_VTIMER; break; case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: - set_timer_irqs(vcpu->kvm, timer_irq(vtimer), irq); + idx = TIMER_PTIMER; break; default: - return -ENXIO; + ret = -ENXIO; + goto out; } - return 0; + /* + * We cannot validate the IRQ unicity before we run, so take it at + * face value. The verdict will be given on first vcpu run, for each + * vcpu. Yes this is late. Blame it on the stupid API. + */ + vcpu->kvm->arch.timer_data.ppi[idx] = irq; + +out: + mutex_unlock(&vcpu->kvm->arch.timer_data.lock); + return ret; } int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1c8a4bbae684..4c5e9dfbf83a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -148,6 +148,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_vgic_early_init(kvm); + kvm_timer_init_vm(kvm); + /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->max_vcpus = kvm_arm_default_max_vcpus(); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 27cada09f588..f093ea9f540d 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -36,14 +36,16 @@ struct arch_timer_vm_data { u64 voffset; /* Offset applied to the physical timer/counter */ u64 poffset; + + struct mutex lock; + + /* The PPI for each timer, global to the VM */ + u8 ppi[NR_KVM_TIMERS]; }; struct arch_timer_context { struct kvm_vcpu *vcpu; - /* Timer IRQ */ - struct kvm_irq_level irq; - /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; u64 ns_frac; @@ -57,6 +59,11 @@ struct arch_timer_context { */ bool loaded; + /* Output level of the timer IRQ */ + struct { + bool level; + } irq; + /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; }; @@ -86,6 +93,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); void kvm_timer_update_run(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); +void kvm_timer_init_vm(struct kvm *kvm); + u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); @@ -109,7 +118,8 @@ bool kvm_arch_timer_get_input_level(int vintid); #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) -#define timer_irq(ctx) ((ctx)->irq.irq) +#define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data) +#define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timers tmr,