Message ID | 20210506103228.67864-5-ilstam@mailbox.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: VMX: Implement nested TSC scaling | expand |
On 06/05/21 12:32, ilstam@mailbox.org wrote: > + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) { > + if (vmcs12->secondary_vm_exec_control & SECONDARY_EXEC_TSC_SCALING) { > + vcpu->arch.tsc_offset = kvm_compute_02_tsc_offset( > + vcpu->arch.l1_tsc_offset, > + vmcs12->tsc_multiplier, > + vmcs12->tsc_offset); > + > + vcpu->arch.tsc_scaling_ratio = mul_u64_u64_shr( > + vcpu->arch.tsc_scaling_ratio, > + vmcs12->tsc_multiplier, > + kvm_tsc_scaling_ratio_frac_bits); > + } else { > + vcpu->arch.tsc_offset += vmcs12->tsc_offset; > + } The computation of vcpu->arch.tsc_offset is (not coincidentially) the same that appears in patch 6 + (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) { + if (vmcs12->secondary_vm_exec_control & SECONDARY_EXEC_TSC_SCALING) { + cur_offset = kvm_compute_02_tsc_offset( + l1_offset, + vmcs12->tsc_multiplier, + vmcs12->tsc_offset); + } else { + cur_offset = l1_offset + vmcs12->tsc_offset; So I think you should just pass vmcs12 and the L1 offset to kvm_compute_02_tsc_offset, and let it handle both cases (and possibly even set vcpu->arch.tsc_scaling_ratio in the same function). Paolo
On Thu, 2021-05-06 at 13:32 +0200, Paolo Bonzini wrote: > On 06/05/21 12:32, ilstam@mailbox.org wrote: > > + if (vmcs12->cpu_based_vm_exec_control & > > CPU_BASED_USE_TSC_OFFSETTING) { > > + if (vmcs12->secondary_vm_exec_control & > > SECONDARY_EXEC_TSC_SCALING) { > > + vcpu->arch.tsc_offset = > > kvm_compute_02_tsc_offset( > > + vcpu->arch.l1_tsc_offset, > > + vmcs12->tsc_multiplier, > > + vmcs12->tsc_offset); > > + > > + vcpu->arch.tsc_scaling_ratio = > > mul_u64_u64_shr( > > + vcpu->arch.tsc_scaling_ratio, > > + vmcs12->tsc_multiplier, > > + kvm_tsc_scaling_ratio_frac_bit > > s); > > + } else { > > + vcpu->arch.tsc_offset += vmcs12->tsc_offset; > > + } > > The computation of vcpu->arch.tsc_offset is (not coincidentially) the > same that appears in patch 6 > > + (vmcs12->cpu_based_vm_exec_control & > CPU_BASED_USE_TSC_OFFSETTING)) { > + if (vmcs12->secondary_vm_exec_control & > SECONDARY_EXEC_TSC_SCALING) { > + cur_offset = kvm_compute_02_tsc_offset( > + l1_offset, > + vmcs12->tsc_multiplier, > + vmcs12->tsc_offset); > + } else { > + cur_offset = l1_offset + vmcs12->tsc_offset; > > So I think you should just pass vmcs12 and the L1 offset to > kvm_compute_02_tsc_offset, and let it handle both cases (and possibly > even set vcpu->arch.tsc_scaling_ratio in the same function). > > Paolo > That was my thinking initially too. However, kvm_compute_02_tsc_offset is defined in x86.c which is vmx-agnostic and hence 'struct vmcs12' is not defined there. The way it is now, that function can be re-used by svm code in case we add amd support later. I could try to define a wrapper in vmx.c instead, that calls kvm_compute_02_tsc_offset and move all the code there. However, I think this requires many more changes to the vmx and svm write_l1_tsc_offset functions and to their caller. I am looking at it now. Ilias
On Thu, 2021-05-06 at 10:32 +0000, ilstam@mailbox.org wrote: > From: Ilias Stamatis <ilstam@amazon.com> > > When L2 is entered we need to "merge" the TSC multiplier and TSC offset > values of VMCS01 and VMCS12 and store the result into the current > VMCS02. > > The 02 values are calculated using the following equations: > offset_02 = ((offset_01 * mult_12) >> 48) + offset_12 > mult_02 = (mult_01 * mult_12) >> 48 I would mention that 48 is kvm_tsc_scaling_ratio_frac_bits instead. Also maybe add the common code in a separate patch? > > Signed-off-by: Ilias Stamatis <ilstam@amazon.com> > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/vmx/nested.c | 26 ++++++++++++++++++++++---- > arch/x86/kvm/x86.c | 25 +++++++++++++++++++++++++ > 3 files changed, 48 insertions(+), 4 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index cdddbf0b1177..e7a1eb36f95a 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1780,6 +1780,7 @@ void kvm_define_user_return_msr(unsigned index, u32 msr); > int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); > > u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, bool l1); > +u64 kvm_compute_02_tsc_offset(u64 l1_offset, u64 l2_multiplier, u64 l2_offset); > u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); > > unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > index bced76637823..a1bf28f33837 100644 > --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -3353,8 +3353,22 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, > } > > enter_guest_mode(vcpu); > - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) > - vcpu->arch.tsc_offset += vmcs12->tsc_offset; > + > + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) { > + if (vmcs12->secondary_vm_exec_control & SECONDARY_EXEC_TSC_SCALING) { > + vcpu->arch.tsc_offset = kvm_compute_02_tsc_offset( > + vcpu->arch.l1_tsc_offset, > + vmcs12->tsc_multiplier, > + vmcs12->tsc_offset); > + > + vcpu->arch.tsc_scaling_ratio = mul_u64_u64_shr( > + vcpu->arch.tsc_scaling_ratio, > + vmcs12->tsc_multiplier, > + kvm_tsc_scaling_ratio_frac_bits); > + } else { > + vcpu->arch.tsc_offset += vmcs12->tsc_offset; > + } > + } > > if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) { > exit_reason.basic = EXIT_REASON_INVALID_STATE; > @@ -4454,8 +4468,12 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, > if (nested_cpu_has_preemption_timer(vmcs12)) > hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); > > - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) > - vcpu->arch.tsc_offset -= vmcs12->tsc_offset; > + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) { > + vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset; > + > + if (vmcs12->secondary_vm_exec_control & SECONDARY_EXEC_TSC_SCALING) > + vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio; > + } > > if (likely(!vmx->fail)) { > sync_vmcs02_to_vmcs12(vcpu, vmcs12); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 26a4c0f46f15..87deb119c521 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -2266,6 +2266,31 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) > return target_tsc - tsc; > } > > +/* > + * This function computes the TSC offset that is stored in VMCS02 when entering > + * L2 by combining the offset and multiplier values of VMCS01 and VMCS12. > + */ > +u64 kvm_compute_02_tsc_offset(u64 l1_offset, u64 l2_multiplier, u64 l2_offset) > +{ > + u64 offset; > + > + /* > + * The L1 offset is interpreted as a signed number by the CPU and can > + * be negative. So we extract the sign before the multiplication and > + * put it back afterwards if needed. If I understand correctly the reason for sign extraction is that we don't have mul_s64_u64_shr. Maybe we should add it? The pattern of (A * B) >> shift appears many times in TSC scaling. So instead of this function maybe just use something like that? merged_offset = l2_offset + mul_s64_u64_shr ((s64) l1_offset, l2_multiplier, kvm_tsc_scaling_ratio_frac_bits) Or another idea: How about u64 __kvm_scale_tsc_value(u64 value, u64 multiplier) { return mul_u64_u64_shr(value, multiplier, kvm_tsc_scaling_ratio_frac_bits); } and s64 __kvm_scale_tsc_offset(u64 value, u64 multiplier) { return mul_s64_u64_shr((s64)value, multiplier, kvm_tsc_scaling_ratio_frac_bits); } And then use them in the code. Overall though the code *looks* correct to me but I might have missed something. Best regards, Maxim Levitsky > + */ > + offset = mul_u64_u64_shr(abs((s64) l1_offset), > + l2_multiplier, > + kvm_tsc_scaling_ratio_frac_bits); > + > + if ((s64) l1_offset < 0) > + offset = -((s64) offset); > + > + offset += l2_offset; > + return offset; > +} > +EXPORT_SYMBOL_GPL(kvm_compute_02_tsc_offset); > + > u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) > { > return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc, true);
On 06/05/21 19:35, Stamatis, Ilias wrote: > On Thu, 2021-05-06 at 13:32 +0200, Paolo Bonzini wrote: >> On 06/05/21 12:32, ilstam@mailbox.org wrote: >>> + if (vmcs12->cpu_based_vm_exec_control & >>> CPU_BASED_USE_TSC_OFFSETTING) { >>> + if (vmcs12->secondary_vm_exec_control & >>> SECONDARY_EXEC_TSC_SCALING) { >>> + vcpu->arch.tsc_offset = >>> kvm_compute_02_tsc_offset( >>> + vcpu->arch.l1_tsc_offset, >>> + vmcs12->tsc_multiplier, >>> + vmcs12->tsc_offset); >>> + >>> + vcpu->arch.tsc_scaling_ratio = >>> mul_u64_u64_shr( >>> + vcpu->arch.tsc_scaling_ratio, >>> + vmcs12->tsc_multiplier, >>> + kvm_tsc_scaling_ratio_frac_bit >>> s); >>> + } else { >>> + vcpu->arch.tsc_offset += vmcs12->tsc_offset; >>> + } >> >> The computation of vcpu->arch.tsc_offset is (not coincidentially) the >> same that appears in patch 6 >> >> + (vmcs12->cpu_based_vm_exec_control & >> CPU_BASED_USE_TSC_OFFSETTING)) { >> + if (vmcs12->secondary_vm_exec_control & >> SECONDARY_EXEC_TSC_SCALING) { >> + cur_offset = kvm_compute_02_tsc_offset( >> + l1_offset, >> + vmcs12->tsc_multiplier, >> + vmcs12->tsc_offset); >> + } else { >> + cur_offset = l1_offset + vmcs12->tsc_offset; >> >> So I think you should just pass vmcs12 and the L1 offset to >> kvm_compute_02_tsc_offset, and let it handle both cases (and possibly >> even set vcpu->arch.tsc_scaling_ratio in the same function). > > That was my thinking initially too. However, kvm_compute_02_tsc_offset > is defined in x86.c which is vmx-agnostic and hence 'struct vmcs12' is > not defined there. Good point. Yeah, it's trading one comde duplication for another. Right now there's already code duplication in write_l1_tsc_offset, so it makes some sense to limit duplication _within_ the file and lose in duplicated code across vmx/svm, but either is okay. Paolo
On Mon, 2021-05-10 at 16:53 +0300, Maxim Levitsky wrote: > On Thu, 2021-05-06 at 10:32 +0000, ilstam@mailbox.org wrote: > > From: Ilias Stamatis <ilstam@amazon.com> > > > > When L2 is entered we need to "merge" the TSC multiplier and TSC > > offset > > values of VMCS01 and VMCS12 and store the result into the current > > VMCS02. > > > > The 02 values are calculated using the following equations: > > offset_02 = ((offset_01 * mult_12) >> 48) + offset_12 > > mult_02 = (mult_01 * mult_12) >> 48 > > I would mention that 48 is kvm_tsc_scaling_ratio_frac_bits instead. > Also maybe add the common code in a separate patch? Right, will do. > > > > > Signed-off-by: Ilias Stamatis <ilstam@amazon.com> > > --- > > arch/x86/include/asm/kvm_host.h | 1 + > > arch/x86/kvm/vmx/nested.c | 26 ++++++++++++++++++++++---- > > arch/x86/kvm/x86.c | 25 +++++++++++++++++++++++++ > > 3 files changed, 48 insertions(+), 4 deletions(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h > > b/arch/x86/include/asm/kvm_host.h > > index cdddbf0b1177..e7a1eb36f95a 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -1780,6 +1780,7 @@ void kvm_define_user_return_msr(unsigned > > index, u32 msr); > > int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); > > > > u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, bool l1); > > +u64 kvm_compute_02_tsc_offset(u64 l1_offset, u64 l2_multiplier, u64 > > l2_offset); > > u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); > > > > unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > > index bced76637823..a1bf28f33837 100644 > > --- a/arch/x86/kvm/vmx/nested.c > > +++ b/arch/x86/kvm/vmx/nested.c > > @@ -3353,8 +3353,22 @@ enum nvmx_vmentry_status > > nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, > > } > > > > enter_guest_mode(vcpu); > > - if (vmcs12->cpu_based_vm_exec_control & > > CPU_BASED_USE_TSC_OFFSETTING) > > - vcpu->arch.tsc_offset += vmcs12->tsc_offset; > > + > > + if (vmcs12->cpu_based_vm_exec_control & > > CPU_BASED_USE_TSC_OFFSETTING) { > > + if (vmcs12->secondary_vm_exec_control & > > SECONDARY_EXEC_TSC_SCALING) { > > + vcpu->arch.tsc_offset = > > kvm_compute_02_tsc_offset( > > + vcpu->arch.l1_tsc_offset, > > + vmcs12->tsc_multiplier, > > + vmcs12->tsc_offset); > > + > > + vcpu->arch.tsc_scaling_ratio = > > mul_u64_u64_shr( > > + vcpu->arch.tsc_scaling_ratio, > > + vmcs12->tsc_multiplier, > > + kvm_tsc_scaling_ratio_frac_bit > > s); > > + } else { > > + vcpu->arch.tsc_offset += vmcs12->tsc_offset; > > + } > > + } > > > > if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) { > > exit_reason.basic = EXIT_REASON_INVALID_STATE; > > @@ -4454,8 +4468,12 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, > > u32 vm_exit_reason, > > if (nested_cpu_has_preemption_timer(vmcs12)) > > hrtimer_cancel(&to_vmx(vcpu)- > > >nested.preemption_timer); > > > > - if (vmcs12->cpu_based_vm_exec_control & > > CPU_BASED_USE_TSC_OFFSETTING) > > - vcpu->arch.tsc_offset -= vmcs12->tsc_offset; > > + if (vmcs12->cpu_based_vm_exec_control & > > CPU_BASED_USE_TSC_OFFSETTING) { > > + vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset; > > + > > + if (vmcs12->secondary_vm_exec_control & > > SECONDARY_EXEC_TSC_SCALING) > > + vcpu->arch.tsc_scaling_ratio = vcpu- > > >arch.l1_tsc_scaling_ratio; > > + } > > > > if (likely(!vmx->fail)) { > > sync_vmcs02_to_vmcs12(vcpu, vmcs12); > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 26a4c0f46f15..87deb119c521 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -2266,6 +2266,31 @@ static u64 kvm_compute_tsc_offset(struct > > kvm_vcpu *vcpu, u64 target_tsc) > > return target_tsc - tsc; > > } > > > > +/* > > + * This function computes the TSC offset that is stored in VMCS02 > > when entering > > + * L2 by combining the offset and multiplier values of VMCS01 and > > VMCS12. > > + */ > > +u64 kvm_compute_02_tsc_offset(u64 l1_offset, u64 l2_multiplier, u64 > > l2_offset) > > +{ > > + u64 offset; > > + > > + /* > > + * The L1 offset is interpreted as a signed number by the CPU > > and can > > + * be negative. So we extract the sign before the > > multiplication and > > + * put it back afterwards if needed. > > If I understand correctly the reason for sign extraction is that we > don't > have mul_s64_u64_shr. Maybe we should add it? > Initially I added a mul_s64_s64_shr and used that. You can take a look here: https://github.com/ilstam/linux/commit/7bc0b75bb7b8d9bcecf41e2e1e1936880d3b93d1 I added mul_s64_s64_shr instead of mul_s64_u64_shr thinking that a) perhaps it would be more useful as a general-purpose function and b) even though the multiplier is unsigned in reality it's caped so it should be safe to cast it to signed. But then I realized that mul_u64_u64_shr that we already have is enough and I can just simply re- use that. How would a mul_s64_u64_shr be implemented? I think in the end we need to decide if we want to do signed or unsigned multiplication. And depending on this we need to do a signed or unsigned right shift accordingly. So if we did have a mul_s64_u64_shr wouldn't it need to cast either of the arguments and then use mul_u64_u64 (or mul_s64_s64) being just a wrapper essentially? I don't think that we can guarantee that casting either of the arguments is always safe if we want to advertise this as a general purpose function. Thanks for the reviews! Ilias > The pattern of (A * B) >> shift appears many times in TSC scaling. > > So instead of this function maybe just use something like that? > > merged_offset = l2_offset + mul_s64_u64_shr ((s64) l1_offset, > l2_multiplier, > kvm_tsc_scaling_ratio_fra > c_bits) > > Or another idea: > > How about > > u64 __kvm_scale_tsc_value(u64 value, u64 multiplier) { > return mul_u64_u64_shr(value, > multiplier, > kvm_tsc_scaling_ratio_frac_bits); > } > > > and > > s64 __kvm_scale_tsc_offset(u64 value, u64 multiplier) > { > return mul_s64_u64_shr((s64)value, > multiplier, > kvm_tsc_scaling_ratio_frac_bits); > } > > And then use them in the code. > > Overall though the code *looks* correct to me > but I might have missed something. > > Best regards, > Maxim Levitsky > > > > + */ > > + offset = mul_u64_u64_shr(abs((s64) l1_offset), > > + l2_multiplier, > > + kvm_tsc_scaling_ratio_frac_bits); > > + > > + if ((s64) l1_offset < 0) > > + offset = -((s64) offset); > > + > > + offset += l2_offset; > > + return offset; > > +} > > +EXPORT_SYMBOL_GPL(kvm_compute_02_tsc_offset); > > + > > u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) > > { > > return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, > > host_tsc, true); > >
On Mon, 2021-05-10 at 14:44 +0000, Stamatis, Ilias wrote: > On Mon, 2021-05-10 at 16:53 +0300, Maxim Levitsky wrote: > > On Thu, 2021-05-06 at 10:32 +0000, ilstam@mailbox.org wrote: > > > From: Ilias Stamatis <ilstam@amazon.com> > > > > > > When L2 is entered we need to "merge" the TSC multiplier and TSC > > > offset > > > values of VMCS01 and VMCS12 and store the result into the current > > > VMCS02. > > > > > > The 02 values are calculated using the following equations: > > > offset_02 = ((offset_01 * mult_12) >> 48) + offset_12 > > > mult_02 = (mult_01 * mult_12) >> 48 > > > > I would mention that 48 is kvm_tsc_scaling_ratio_frac_bits instead. > > Also maybe add the common code in a separate patch? > > Right, will do. > > > > Signed-off-by: Ilias Stamatis <ilstam@amazon.com> > > > --- > > > arch/x86/include/asm/kvm_host.h | 1 + > > > arch/x86/kvm/vmx/nested.c | 26 ++++++++++++++++++++++---- > > > arch/x86/kvm/x86.c | 25 +++++++++++++++++++++++++ > > > 3 files changed, 48 insertions(+), 4 deletions(-) > > > > > > diff --git a/arch/x86/include/asm/kvm_host.h > > > b/arch/x86/include/asm/kvm_host.h > > > index cdddbf0b1177..e7a1eb36f95a 100644 > > > --- a/arch/x86/include/asm/kvm_host.h > > > +++ b/arch/x86/include/asm/kvm_host.h > > > @@ -1780,6 +1780,7 @@ void kvm_define_user_return_msr(unsigned > > > index, u32 msr); > > > int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); > > > > > > u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, bool l1); > > > +u64 kvm_compute_02_tsc_offset(u64 l1_offset, u64 l2_multiplier, u64 > > > l2_offset); > > > u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); > > > > > > unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); > > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > > > index bced76637823..a1bf28f33837 100644 > > > --- a/arch/x86/kvm/vmx/nested.c > > > +++ b/arch/x86/kvm/vmx/nested.c > > > @@ -3353,8 +3353,22 @@ enum nvmx_vmentry_status > > > nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, > > > } > > > > > > enter_guest_mode(vcpu); > > > - if (vmcs12->cpu_based_vm_exec_control & > > > CPU_BASED_USE_TSC_OFFSETTING) > > > - vcpu->arch.tsc_offset += vmcs12->tsc_offset; > > > + > > > + if (vmcs12->cpu_based_vm_exec_control & > > > CPU_BASED_USE_TSC_OFFSETTING) { > > > + if (vmcs12->secondary_vm_exec_control & > > > SECONDARY_EXEC_TSC_SCALING) { > > > + vcpu->arch.tsc_offset = > > > kvm_compute_02_tsc_offset( > > > + vcpu->arch.l1_tsc_offset, > > > + vmcs12->tsc_multiplier, > > > + vmcs12->tsc_offset); > > > + > > > + vcpu->arch.tsc_scaling_ratio = > > > mul_u64_u64_shr( > > > + vcpu->arch.tsc_scaling_ratio, > > > + vmcs12->tsc_multiplier, > > > + kvm_tsc_scaling_ratio_frac_bit > > > s); > > > + } else { > > > + vcpu->arch.tsc_offset += vmcs12->tsc_offset; > > > + } > > > + } > > > > > > if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) { > > > exit_reason.basic = EXIT_REASON_INVALID_STATE; > > > @@ -4454,8 +4468,12 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, > > > u32 vm_exit_reason, > > > if (nested_cpu_has_preemption_timer(vmcs12)) > > > hrtimer_cancel(&to_vmx(vcpu)- > > > > nested.preemption_timer); > > > > > > - if (vmcs12->cpu_based_vm_exec_control & > > > CPU_BASED_USE_TSC_OFFSETTING) > > > - vcpu->arch.tsc_offset -= vmcs12->tsc_offset; > > > + if (vmcs12->cpu_based_vm_exec_control & > > > CPU_BASED_USE_TSC_OFFSETTING) { > > > + vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset; > > > + > > > + if (vmcs12->secondary_vm_exec_control & > > > SECONDARY_EXEC_TSC_SCALING) > > > + vcpu->arch.tsc_scaling_ratio = vcpu- > > > > arch.l1_tsc_scaling_ratio; > > > + } > > > > > > if (likely(!vmx->fail)) { > > > sync_vmcs02_to_vmcs12(vcpu, vmcs12); > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > index 26a4c0f46f15..87deb119c521 100644 > > > --- a/arch/x86/kvm/x86.c > > > +++ b/arch/x86/kvm/x86.c > > > @@ -2266,6 +2266,31 @@ static u64 kvm_compute_tsc_offset(struct > > > kvm_vcpu *vcpu, u64 target_tsc) > > > return target_tsc - tsc; > > > } > > > > > > +/* > > > + * This function computes the TSC offset that is stored in VMCS02 > > > when entering > > > + * L2 by combining the offset and multiplier values of VMCS01 and > > > VMCS12. > > > + */ > > > +u64 kvm_compute_02_tsc_offset(u64 l1_offset, u64 l2_multiplier, u64 > > > l2_offset) > > > +{ > > > + u64 offset; > > > + > > > + /* > > > + * The L1 offset is interpreted as a signed number by the CPU > > > and can > > > + * be negative. So we extract the sign before the > > > multiplication and > > > + * put it back afterwards if needed. > > > > If I understand correctly the reason for sign extraction is that we > > don't > > have mul_s64_u64_shr. Maybe we should add it? > > > > Initially I added a mul_s64_s64_shr and used that. > > You can take a look > here: > > https://github.com/ilstam/linux/commit/7bc0b75bb7b8d9bcecf41e2e1e1936880d3b93d1 > > > I added mul_s64_s64_shr instead of mul_s64_u64_shr thinking that a) > perhaps it would be more useful as a general-purpose function and b) > even though the multiplier is unsigned in reality it's caped so it > should be safe to cast it to signed. Actually is it? Intel's PRM I think defines TSC multiplier to be full 64 bit. It is shifted right after multiplication, making the multiplier be effictively a fixed point 16.48 number but since multiplication is done first, it will still have to use full 64 bit value. > > But then I realized that > mul_u64_u64_shr that we already have is enough and I can just simply re- > use that. > > How would a mul_s64_u64_shr be implemented? I'll say even if it is implemented by just extracting the sign as you did and then calling mul_u64_u64_shr, it could still be a bit better documentation wise. > > I think in the end > we need to decide if we want to do signed or unsigned multiplication. > And depending on this we need to do a signed or unsigned right shift > accordingly. I agree. > > So if we did have a mul_s64_u64_shr wouldn't it need to cast > either of the arguments and then use mul_u64_u64 (or mul_s64_s64) being > just a wrapper essentially? I don't think that we can guarantee that > casting either of the arguments is always safe if we want to advertise > this as a general purpose function. Note that I don't have a strong opinion on this whole thing, so if you feel that it should be left as is, I don't mind. > > Thanks for the reviews! No problem! Best regards, Maxim Levitsky > > Ilias > > > The pattern of (A * B) >> shift appears many times in TSC scaling. > > > > So instead of this function maybe just use something like that? > > > > merged_offset = l2_offset + mul_s64_u64_shr ((s64) l1_offset, > > l2_multiplier, > > kvm_tsc_scaling_ratio_fra > > c_bits) > > > > Or another idea: > > > > How about > > > > u64 __kvm_scale_tsc_value(u64 value, u64 multiplier) { > > return mul_u64_u64_shr(value, > > multiplier, > > kvm_tsc_scaling_ratio_frac_bits); > > } > > > > > > and > > > > s64 __kvm_scale_tsc_offset(u64 value, u64 multiplier) > > { > > return mul_s64_u64_shr((s64)value, > > multiplier, > > kvm_tsc_scaling_ratio_frac_bits); > > } > > > > And then use them in the code. > > > > Overall though the code *looks* correct to me > > but I might have missed something. > > > > Best regards, > > Maxim Levitsky > > > > > > > + */ > > > + offset = mul_u64_u64_shr(abs((s64) l1_offset), > > > + l2_multiplier, > > > + kvm_tsc_scaling_ratio_frac_bits); > > > + > > > + if ((s64) l1_offset < 0) > > > + offset = -((s64) offset); > > > + > > > + offset += l2_offset; > > > + return offset; > > > +} > > > +EXPORT_SYMBOL_GPL(kvm_compute_02_tsc_offset); > > > + > > > u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) > > > { > > > return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, > > > host_tsc, true);
On Tue, 2021-05-11 at 15:38 +0300, Maxim Levitsky wrote: > On Mon, 2021-05-10 at 14:44 +0000, Stamatis, Ilias wrote: > > On Mon, 2021-05-10 at 16:53 +0300, Maxim Levitsky wrote: > > > On Thu, 2021-05-06 at 10:32 +0000, ilstam@mailbox.org wrote: > > > > From: Ilias Stamatis <ilstam@amazon.com> > > > > > > > > When L2 is entered we need to "merge" the TSC multiplier and TSC > > > > offset > > > > values of VMCS01 and VMCS12 and store the result into the current > > > > VMCS02. > > > > > > > > The 02 values are calculated using the following equations: > > > > offset_02 = ((offset_01 * mult_12) >> 48) + offset_12 > > > > mult_02 = (mult_01 * mult_12) >> 48 > > > > > > I would mention that 48 is kvm_tsc_scaling_ratio_frac_bits instead. > > > Also maybe add the common code in a separate patch? > > > > Right, will do. > > > > > > Signed-off-by: Ilias Stamatis <ilstam@amazon.com> > > > > --- > > > > arch/x86/include/asm/kvm_host.h | 1 + > > > > arch/x86/kvm/vmx/nested.c | 26 ++++++++++++++++++++++---- > > > > arch/x86/kvm/x86.c | 25 +++++++++++++++++++++++++ > > > > 3 files changed, 48 insertions(+), 4 deletions(-) > > > > > > > > diff --git a/arch/x86/include/asm/kvm_host.h > > > > b/arch/x86/include/asm/kvm_host.h > > > > index cdddbf0b1177..e7a1eb36f95a 100644 > > > > --- a/arch/x86/include/asm/kvm_host.h > > > > +++ b/arch/x86/include/asm/kvm_host.h > > > > @@ -1780,6 +1780,7 @@ void kvm_define_user_return_msr(unsigned > > > > index, u32 msr); > > > > int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); > > > > > > > > u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, bool l1); > > > > +u64 kvm_compute_02_tsc_offset(u64 l1_offset, u64 l2_multiplier, u64 > > > > l2_offset); > > > > u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); > > > > > > > > unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); > > > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > > > > index bced76637823..a1bf28f33837 100644 > > > > --- a/arch/x86/kvm/vmx/nested.c > > > > +++ b/arch/x86/kvm/vmx/nested.c > > > > @@ -3353,8 +3353,22 @@ enum nvmx_vmentry_status > > > > nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, > > > > } > > > > > > > > enter_guest_mode(vcpu); > > > > - if (vmcs12->cpu_based_vm_exec_control & > > > > CPU_BASED_USE_TSC_OFFSETTING) > > > > - vcpu->arch.tsc_offset += vmcs12->tsc_offset; > > > > + > > > > + if (vmcs12->cpu_based_vm_exec_control & > > > > CPU_BASED_USE_TSC_OFFSETTING) { > > > > + if (vmcs12->secondary_vm_exec_control & > > > > SECONDARY_EXEC_TSC_SCALING) { > > > > + vcpu->arch.tsc_offset = > > > > kvm_compute_02_tsc_offset( > > > > + vcpu->arch.l1_tsc_offset, > > > > + vmcs12->tsc_multiplier, > > > > + vmcs12->tsc_offset); > > > > + > > > > + vcpu->arch.tsc_scaling_ratio = > > > > mul_u64_u64_shr( > > > > + vcpu->arch.tsc_scaling_ratio, > > > > + vmcs12->tsc_multiplier, > > > > + kvm_tsc_scaling_ratio_frac_bit > > > > s); > > > > + } else { > > > > + vcpu->arch.tsc_offset += vmcs12->tsc_offset; > > > > + } > > > > + } > > > > > > > > if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) { > > > > exit_reason.basic = EXIT_REASON_INVALID_STATE; > > > > @@ -4454,8 +4468,12 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, > > > > u32 vm_exit_reason, > > > > if (nested_cpu_has_preemption_timer(vmcs12)) > > > > hrtimer_cancel(&to_vmx(vcpu)- > > > > > nested.preemption_timer); > > > > > > > > - if (vmcs12->cpu_based_vm_exec_control & > > > > CPU_BASED_USE_TSC_OFFSETTING) > > > > - vcpu->arch.tsc_offset -= vmcs12->tsc_offset; > > > > + if (vmcs12->cpu_based_vm_exec_control & > > > > CPU_BASED_USE_TSC_OFFSETTING) { > > > > + vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset; > > > > + > > > > + if (vmcs12->secondary_vm_exec_control & > > > > SECONDARY_EXEC_TSC_SCALING) > > > > + vcpu->arch.tsc_scaling_ratio = vcpu- > > > > > arch.l1_tsc_scaling_ratio; > > > > > > > > + } > > > > > > > > if (likely(!vmx->fail)) { > > > > sync_vmcs02_to_vmcs12(vcpu, vmcs12); > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > > index 26a4c0f46f15..87deb119c521 100644 > > > > --- a/arch/x86/kvm/x86.c > > > > +++ b/arch/x86/kvm/x86.c > > > > @@ -2266,6 +2266,31 @@ static u64 kvm_compute_tsc_offset(struct > > > > kvm_vcpu *vcpu, u64 target_tsc) > > > > return target_tsc - tsc; > > > > } > > > > > > > > +/* > > > > + * This function computes the TSC offset that is stored in VMCS02 > > > > when entering > > > > + * L2 by combining the offset and multiplier values of VMCS01 and > > > > VMCS12. > > > > + */ > > > > +u64 kvm_compute_02_tsc_offset(u64 l1_offset, u64 l2_multiplier, u64 > > > > l2_offset) > > > > +{ > > > > + u64 offset; > > > > + > > > > + /* > > > > + * The L1 offset is interpreted as a signed number by the CPU > > > > and can > > > > + * be negative. So we extract the sign before the > > > > multiplication and > > > > + * put it back afterwards if needed. > > > > > > If I understand correctly the reason for sign extraction is that we > > > don't > > > have mul_s64_u64_shr. Maybe we should add it? > > > > > > > Initially I added a mul_s64_s64_shr and used that. > > > > You can take a look > > here: > > > > https://github.com/ilstam/linux/commit/7bc0b75bb7b8d9bcecf41e2e1e1936880d3b93d1 > > > > > > I added mul_s64_s64_shr instead of mul_s64_u64_shr thinking that a) > > perhaps it would be more useful as a general-purpose function and b) > > even though the multiplier is unsigned in reality it's caped so it > > should be safe to cast it to signed. > > Actually is it? Intel's PRM I think defines TSC multiplier to be full > 64 bit. It is shifted right after multiplication, making the multiplier > be effictively a fixed point 16.48 number but since multiplication is done > first, it will still have to use full 64 bit value. Oh, my bad. I remember I saw some max limit in the code but this was for the TSC frequency requested by userspace, it was not for the actual multiplier. > > > > But then I realized that > > mul_u64_u64_shr that we already have is enough and I can just simply re- > > use that. > > > > How would a mul_s64_u64_shr be implemented? > > I'll say even if it is implemented by just extracting the sign as you did > and then calling mul_u64_u64_shr, > it could still be a bit better documentation wise. That makes sense, I will do exactly this then. And I'll remove kvm_compute_02_tsc_offset completely. > > > > > I think in the end > > we need to decide if we want to do signed or unsigned multiplication. > > And depending on this we need to do a signed or unsigned right shift > > accordingly. > > I agree. > > > > So if we did have a mul_s64_u64_shr wouldn't it need to cast > > either of the arguments and then use mul_u64_u64 (or mul_s64_s64) being > > just a wrapper essentially? I don't think that we can guarantee that > > casting either of the arguments is always safe if we want to advertise > > this as a general purpose function. > > Note that I don't have a strong opinion on this whole thing, so if you feel > that it should be left as is, I don't mind. > > > > > Thanks for the reviews! > > No problem! > > Best regards, > Maxim Levitsky
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cdddbf0b1177..e7a1eb36f95a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1780,6 +1780,7 @@ void kvm_define_user_return_msr(unsigned index, u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, bool l1); +u64 kvm_compute_02_tsc_offset(u64 l1_offset, u64 l2_multiplier, u64 l2_offset); u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index bced76637823..a1bf28f33837 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3353,8 +3353,22 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, } enter_guest_mode(vcpu); - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) - vcpu->arch.tsc_offset += vmcs12->tsc_offset; + + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) { + if (vmcs12->secondary_vm_exec_control & SECONDARY_EXEC_TSC_SCALING) { + vcpu->arch.tsc_offset = kvm_compute_02_tsc_offset( + vcpu->arch.l1_tsc_offset, + vmcs12->tsc_multiplier, + vmcs12->tsc_offset); + + vcpu->arch.tsc_scaling_ratio = mul_u64_u64_shr( + vcpu->arch.tsc_scaling_ratio, + vmcs12->tsc_multiplier, + kvm_tsc_scaling_ratio_frac_bits); + } else { + vcpu->arch.tsc_offset += vmcs12->tsc_offset; + } + } if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) { exit_reason.basic = EXIT_REASON_INVALID_STATE; @@ -4454,8 +4468,12 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, if (nested_cpu_has_preemption_timer(vmcs12)) hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) - vcpu->arch.tsc_offset -= vmcs12->tsc_offset; + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) { + vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset; + + if (vmcs12->secondary_vm_exec_control & SECONDARY_EXEC_TSC_SCALING) + vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio; + } if (likely(!vmx->fail)) { sync_vmcs02_to_vmcs12(vcpu, vmcs12); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 26a4c0f46f15..87deb119c521 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2266,6 +2266,31 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) return target_tsc - tsc; } +/* + * This function computes the TSC offset that is stored in VMCS02 when entering + * L2 by combining the offset and multiplier values of VMCS01 and VMCS12. + */ +u64 kvm_compute_02_tsc_offset(u64 l1_offset, u64 l2_multiplier, u64 l2_offset) +{ + u64 offset; + + /* + * The L1 offset is interpreted as a signed number by the CPU and can + * be negative. So we extract the sign before the multiplication and + * put it back afterwards if needed. + */ + offset = mul_u64_u64_shr(abs((s64) l1_offset), + l2_multiplier, + kvm_tsc_scaling_ratio_frac_bits); + + if ((s64) l1_offset < 0) + offset = -((s64) offset); + + offset += l2_offset; + return offset; +} +EXPORT_SYMBOL_GPL(kvm_compute_02_tsc_offset); + u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc, true);