Message ID | 20231101112934.631344-1-paul@xen.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] KVM x86/xen: add an override for PVCLOCK_TSC_STABLE_BIT | expand |
On Wed, Nov 01, 2023, Paul Durrant wrote: > @@ -3231,12 +3245,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) > vcpu->hv_clock.flags = pvclock_flags; > > if (vcpu->pv_time.active) > - kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0); > + kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false); > + > if (vcpu->xen.vcpu_info_cache.active) > kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache, > - offsetof(struct compat_vcpu_info, time)); > + offsetof(struct compat_vcpu_info, time), > + xen_pvclock_tsc_unstable); > if (vcpu->xen.vcpu_time_info_cache.active) > - kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0); > + kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0, > + xen_pvclock_tsc_unstable); > kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); > return 0; Please rebase, this conflicts with commit ee11ab6bb04e ("KVM: X86: Reduce size of kvm_vcpu_arch structure when CONFIG_KVM_XEN=n"). I can solve the conflict, but I really shouldn't have to. Also, your version of git should support --base, which makes life much easier for others when non-trivial conflicts are encountered. From maintainer-kvm-x86.rst: : Git Base : ~~~~~~~~ : If you are using git version 2.9.0 or later (Googlers, this is all of you!), : use ``git format-patch`` with the ``--base`` flag to automatically include the : base tree information in the generated patches. : : Note, ``--base=auto`` works as expected if and only if a branch's upstream is : set to the base topic branch, e.g. it will do the wrong thing if your upstream : is set to your personal repository for backup purposes. An alternative "auto" : solution is to derive the names of your development branches based on their : KVM x86 topic, and feed that into ``--base``. E.g. ``x86/pmu/my_branch_name``, : and then write a small wrapper to extract ``pmu`` from the current branch name : to yield ``--base=x/pmu``, where ``x`` is whatever name your repository uses to : track the KVM x86 remote. > @@ -4531,7 +4548,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | > KVM_XEN_HVM_CONFIG_SHARED_INFO | > KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL | > - KVM_XEN_HVM_CONFIG_EVTCHN_SEND; > + KVM_XEN_HVM_CONFIG_EVTCHN_SEND | > + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; > if (sched_info_on()) > r |= KVM_XEN_HVM_CONFIG_RUNSTATE | > KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG; > diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c > index 40edf4d1974c..7699d94f190b 100644 > --- a/arch/x86/kvm/xen.c > +++ b/arch/x86/kvm/xen.c > @@ -1111,9 +1111,12 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) > > int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) > { > + bool update_pvclock = false; > + > /* Only some feature flags need to be *enabled* by userspace */ > u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | > - KVM_XEN_HVM_CONFIG_EVTCHN_SEND; > + KVM_XEN_HVM_CONFIG_EVTCHN_SEND | > + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; > > if (xhc->flags & ~permitted_flags) > return -EINVAL; > @@ -1134,9 +1137,19 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) > else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) > static_branch_slow_dec_deferred(&kvm_xen_enabled); > > + if ((kvm->arch.xen_hvm_config.flags & > + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE) != > + (xhc->flags & > + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE)) > + update_pvclock = true; Rather than a boolean and the above, which is a bit hard to parse, what about taking a snapshot of the old flags and then doing an XOR? /* Only some feature flags need to be *enabled* by userspace */ u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_EVTCHN_SEND | KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; u32 old_flags; if (xhc->flags & ~permitted_flags) return -EINVAL; /* * With hypercall interception the kernel generates its own * hypercall page so it must not be provided. */ if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) && (xhc->blob_addr_32 || xhc->blob_addr_64 || xhc->blob_size_32 || xhc->blob_size_64)) return -EINVAL; mutex_lock(&kvm->arch.xen.xen_lock); if (xhc->msr && !kvm->arch.xen_hvm_config.msr) static_branch_inc(&kvm_xen_enabled.key); else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) static_branch_slow_dec_deferred(&kvm_xen_enabled); old_flags = kvm->arch.xen_hvm_config.flags; memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); mutex_unlock(&kvm->arch.xen.xen_lock); if ((old_flags ^ xhc->flags) & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE) kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); return 0;
On 01/11/2023 16:46, Sean Christopherson wrote: > On Wed, Nov 01, 2023, Paul Durrant wrote: >> @@ -3231,12 +3245,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) >> vcpu->hv_clock.flags = pvclock_flags; >> >> if (vcpu->pv_time.active) >> - kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0); >> + kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false); >> + >> if (vcpu->xen.vcpu_info_cache.active) >> kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache, >> - offsetof(struct compat_vcpu_info, time)); >> + offsetof(struct compat_vcpu_info, time), >> + xen_pvclock_tsc_unstable); >> if (vcpu->xen.vcpu_time_info_cache.active) >> - kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0); >> + kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0, >> + xen_pvclock_tsc_unstable); >> kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); >> return 0; > > Please rebase, this conflicts with commit ee11ab6bb04e ("KVM: X86: Reduce size of > kvm_vcpu_arch structure when CONFIG_KVM_XEN=n"). I can solve the conflict, but > I really shouldn't have to. > > Also, your version of git should support --base, which makes life much easier for > others when non-trivial conflicts are encountered. From maintainer-kvm-x86.rst: > > : Git Base > : ~~~~~~~~ > : If you are using git version 2.9.0 or later (Googlers, this is all of you!), > : use ``git format-patch`` with the ``--base`` flag to automatically include the > : base tree information in the generated patches. > : > : Note, ``--base=auto`` works as expected if and only if a branch's upstream is > : set to the base topic branch, e.g. it will do the wrong thing if your upstream > : is set to your personal repository for backup purposes. An alternative "auto" > : solution is to derive the names of your development branches based on their > : KVM x86 topic, and feed that into ``--base``. E.g. ``x86/pmu/my_branch_name``, > : and then write a small wrapper to extract ``pmu`` from the current branch name > : to yield ``--base=x/pmu``, where ``x`` is whatever name your repository uses to > : track the KVM x86 remote. > Ok, I'll sort that out. Thanks for the tip. >> @@ -4531,7 +4548,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) >> KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | >> KVM_XEN_HVM_CONFIG_SHARED_INFO | >> KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL | >> - KVM_XEN_HVM_CONFIG_EVTCHN_SEND; >> + KVM_XEN_HVM_CONFIG_EVTCHN_SEND | >> + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; >> if (sched_info_on()) >> r |= KVM_XEN_HVM_CONFIG_RUNSTATE | >> KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG; >> diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c >> index 40edf4d1974c..7699d94f190b 100644 >> --- a/arch/x86/kvm/xen.c >> +++ b/arch/x86/kvm/xen.c >> @@ -1111,9 +1111,12 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) >> >> int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) >> { >> + bool update_pvclock = false; >> + >> /* Only some feature flags need to be *enabled* by userspace */ >> u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | >> - KVM_XEN_HVM_CONFIG_EVTCHN_SEND; >> + KVM_XEN_HVM_CONFIG_EVTCHN_SEND | >> + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; >> >> if (xhc->flags & ~permitted_flags) >> return -EINVAL; >> @@ -1134,9 +1137,19 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) >> else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) >> static_branch_slow_dec_deferred(&kvm_xen_enabled); >> >> + if ((kvm->arch.xen_hvm_config.flags & >> + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE) != >> + (xhc->flags & >> + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE)) >> + update_pvclock = true; > > Rather than a boolean and the above, which is a bit hard to parse, what about > taking a snapshot of the old flags and then doing an XOR? > > /* Only some feature flags need to be *enabled* by userspace */ > u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | > KVM_XEN_HVM_CONFIG_EVTCHN_SEND | > KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; > u32 old_flags; > > if (xhc->flags & ~permitted_flags) > return -EINVAL; > > /* > * With hypercall interception the kernel generates its own > * hypercall page so it must not be provided. > */ > if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) && > (xhc->blob_addr_32 || xhc->blob_addr_64 || > xhc->blob_size_32 || xhc->blob_size_64)) > return -EINVAL; > > mutex_lock(&kvm->arch.xen.xen_lock); > > if (xhc->msr && !kvm->arch.xen_hvm_config.msr) > static_branch_inc(&kvm_xen_enabled.key); > else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) > static_branch_slow_dec_deferred(&kvm_xen_enabled); > > old_flags = kvm->arch.xen_hvm_config.flags; > memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); > > mutex_unlock(&kvm->arch.xen.xen_lock); > > if ((old_flags ^ xhc->flags) & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE) > kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); > > return 0; Sure, I can do it that way if you prefer. Paul
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 21a7578142a1..9752a01270df 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8252,6 +8252,7 @@ PVHVM guests. Valid flags are:: #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) + #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG ioctl is available, for the guest to set its hypercall page. @@ -8295,6 +8296,11 @@ behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless specifically enabled (by the guest making the hypercall, causing the VMM to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute). +The KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag indicates that KVM supports +clearing the PVCLOCK_TSC_STABLE_BIT flag in Xen pvclock sources. This will be +done when the KVM_CAP_XEN_HVM ioctl sets the +KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag. + 8.31 KVM_CAP_PPC_MULTITCE ------------------------- diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 41cce5031126..4ad17ad0fc0c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3096,7 +3096,8 @@ u64 get_kvmclock_ns(struct kvm *kvm) static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, struct gfn_to_pfn_cache *gpc, - unsigned int offset) + unsigned int offset, + bool force_tsc_unstable) { struct kvm_vcpu_arch *vcpu = &v->arch; struct pvclock_vcpu_time_info *guest_hv_clock; @@ -3133,6 +3134,10 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, } memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock)); + + if (force_tsc_unstable) + guest_hv_clock->flags &= ~PVCLOCK_TSC_STABLE_BIT; + smp_wmb(); guest_hv_clock->version = ++vcpu->hv_clock.version; @@ -3154,6 +3159,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) u8 pvclock_flags; bool use_master_clock; + /* + * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless + * explicitly told to use TSC as its clocksource Xen will not set this bit. + * This default behaviour led to bugs in some guest kernels which cause + * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags. + */ + bool xen_pvclock_tsc_unstable = + ka->xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; + kernel_ns = 0; host_tsc = 0; @@ -3231,12 +3245,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.flags = pvclock_flags; if (vcpu->pv_time.active) - kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0); + kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false); + if (vcpu->xen.vcpu_info_cache.active) kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache, - offsetof(struct compat_vcpu_info, time)); + offsetof(struct compat_vcpu_info, time), + xen_pvclock_tsc_unstable); if (vcpu->xen.vcpu_time_info_cache.active) - kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0); + kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0, + xen_pvclock_tsc_unstable); kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); return 0; } @@ -4531,7 +4548,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO | KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL | - KVM_XEN_HVM_CONFIG_EVTCHN_SEND; + KVM_XEN_HVM_CONFIG_EVTCHN_SEND | + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; if (sched_info_on()) r |= KVM_XEN_HVM_CONFIG_RUNSTATE | KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 40edf4d1974c..7699d94f190b 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1111,9 +1111,12 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) { + bool update_pvclock = false; + /* Only some feature flags need to be *enabled* by userspace */ u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | - KVM_XEN_HVM_CONFIG_EVTCHN_SEND; + KVM_XEN_HVM_CONFIG_EVTCHN_SEND | + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; if (xhc->flags & ~permitted_flags) return -EINVAL; @@ -1134,9 +1137,19 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) static_branch_slow_dec_deferred(&kvm_xen_enabled); + if ((kvm->arch.xen_hvm_config.flags & + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE) != + (xhc->flags & + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE)) + update_pvclock = true; + memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); mutex_unlock(&kvm->arch.xen.xen_lock); + + if (update_pvclock) + kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); + return 0; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 13065dd96132..e21b53e8358d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1282,6 +1282,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) struct kvm_xen_hvm_config { __u32 flags;