diff mbox series

[RFC,v3,18/59] KVM: x86: Add flag to mark TSC as immutable (for TDX)

Message ID 00772535f09b2bf98e6bc7008e81c6ffb381ed84.1637799475.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: X86: TDX support | expand

Commit Message

Isaku Yamahata Nov. 25, 2021, 12:20 a.m. UTC
From: Sean Christopherson <sean.j.christopherson@intel.com>

The TSC for TDX1 guests is fixed at TD creation time.  Add tsc_immutable
to reflect that the TSC of the guest cannot be changed in any way, and
use it to short circuit all paths that lead to one of the myriad TSC
adjustment flows.

Suggested-by: Kai Huang <kai.huang@linux.intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              | 35 +++++++++++++++++++++++++--------
 2 files changed, 28 insertions(+), 8 deletions(-)

Comments

Thomas Gleixner Nov. 25, 2021, 7:40 p.m. UTC | #1
On Wed, Nov 24 2021 at 16:20, isaku yamahata wrote:
> From: Sean Christopherson <sean.j.christopherson@intel.com>
>
> The TSC for TDX1 guests is fixed at TD creation time.  Add tsc_immutable

What's a TDX1 guest?

> to reflect that the TSC of the guest cannot be changed in any way, and
> use it to short circuit all paths that lead to one of the myriad TSC
> adjustment flows.

I can kinda see the reason for this being valuable on it's own, but in
general why does TDX need a gazillion flags to disable tons of different
things if _ALL_ these flags are going to be set by for TDX guests
anyway?

Seperate flags make only sense when they have a value on their own,
i.e. are useful for things outside of TDX. If not they are just useless
ballast.

Thanks,

        tglx
Sean Christopherson Nov. 29, 2021, 6:05 p.m. UTC | #2
On Thu, Nov 25, 2021, Thomas Gleixner wrote:
> On Wed, Nov 24 2021 at 16:20, isaku yamahata wrote:
> > From: Sean Christopherson <sean.j.christopherson@intel.com>
> >
> > The TSC for TDX1 guests is fixed at TD creation time.  Add tsc_immutable
> 
> What's a TDX1 guest?

The "revision 1.0" version of TDX.  Some of these patches use "TDX1" to identify
behaviors that may not hold true in future iterations of TDX, and also to highlight
things that are dictated by the spec, e.g. some of the guest TSC frequency values.
For this patch in particular, there's probably no need to differentiate TDX1 vs. TDX,
the qualification was more for cases where KVM needs to define magic values to adhere
to the spec, e.g. to make it clear that the magic values aren't made up by KVM.

> > to reflect that the TSC of the guest cannot be changed in any way, and
> > use it to short circuit all paths that lead to one of the myriad TSC
> > adjustment flows.
> 
> I can kinda see the reason for this being valuable on it's own, but in
> general why does TDX need a gazillion flags to disable tons of different
> things if _ALL_ these flags are going to be set by for TDX guests
> anyway?
> 
> Seperate flags make only sense when they have a value on their own,
> i.e. are useful for things outside of TDX. If not they are just useless
> ballast.

SEV-SNP and TDX have different, but overlapping, restrictions.  And SEV-ES also
shares most SEV-SNP's restrictions.  TDX guests that can be debugged and/or profiled
also have different restrictions, though I forget if any of these flags would be
affected.

The goal with individual flags is to avoid seemingly arbitrary is_snp_guest() and
is_tdx_guest() checks throughout common x86 code, e.g. to avoid confusion over why
KVM does X for TDX but Y for SNP.  And I personally find it easer to audit KVM
behavior with respect to the SNP/TDX specs if the non-obvious restrictions are
explicitly set when the VM is created.

For some of the flags, there's also hope that future iterations of TDX will remove
some of the restrictions, though that's more of a bonus than a direct justification
for adding individual flags.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e912e1e853ef..f3808672c720 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1122,6 +1122,7 @@  struct kvm_arch {
 	int audit_point;
 	#endif
 
+	bool tsc_immutable;
 	bool backwards_tsc_observed;
 	bool boot_vcpu_runs_old_kvmclock;
 	u32 bsp_vcpu_id;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fefa4602e879..0ebd60846079 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2247,7 +2247,9 @@  static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
 	u64 ratio;
 
 	/* Guest TSC same frequency as host TSC? */
-	if (!scale) {
+	if (!scale || vcpu->kvm->arch.tsc_immutable) {
+		if (scale)
+			pr_warn_ratelimited("Guest TSC immutable, scaling not supported\n");
 		kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
 		return 0;
 	}
@@ -2534,6 +2536,9 @@  static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
 	bool matched = false;
 	bool synchronizing = false;
 
+	if (WARN_ON_ONCE(vcpu->kvm->arch.tsc_immutable))
+		return;
+
 	raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
 	offset = kvm_compute_l1_tsc_offset(vcpu, data);
 	ns = get_kvmclock_base_ns();
@@ -2960,6 +2965,10 @@  static int kvm_guest_time_update(struct kvm_vcpu *v)
 	u8 pvclock_flags;
 	bool use_master_clock;
 
+	/* Unable to update guest time if the TSC is immutable. */
+	if (ka->tsc_immutable)
+		return 0;
+
 	kernel_ns = 0;
 	host_tsc = 0;
 
@@ -4372,7 +4381,8 @@  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		if (tsc_delta < 0)
 			mark_tsc_unstable("KVM discovered backwards TSC");
 
-		if (kvm_check_tsc_unstable()) {
+		if (kvm_check_tsc_unstable() &&
+		    !vcpu->kvm->arch.tsc_immutable) {
 			u64 offset = kvm_compute_l1_tsc_offset(vcpu,
 						vcpu->arch.last_guest_tsc);
 			kvm_vcpu_write_tsc_offset(vcpu, offset);
@@ -4386,7 +4396,8 @@  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 * On a host with synchronized TSC, there is no need to update
 		 * kvmclock on vcpu->cpu migration
 		 */
-		if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
+		if ((!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) &&
+		    !vcpu->kvm->arch.tsc_immutable)
 			kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
 		if (vcpu->cpu != cpu)
 			kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
@@ -5352,10 +5363,11 @@  long kvm_arch_vcpu_ioctl(struct file *filp,
 		break;
 	}
 	case KVM_SET_TSC_KHZ: {
-		u32 user_tsc_khz;
+		u32 user_tsc_khz = (u32)arg;
 
 		r = -EINVAL;
-		user_tsc_khz = (u32)arg;
+		if (vcpu->kvm->arch.tsc_immutable)
+			goto out;
 
 		if (kvm_has_tsc_control &&
 		    user_tsc_khz >= kvm_max_guest_tsc_khz)
@@ -10994,9 +11006,12 @@  void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
 	if (mutex_lock_killable(&vcpu->mutex))
 		return;
-	vcpu_load(vcpu);
-	kvm_synchronize_tsc(vcpu, 0);
-	vcpu_put(vcpu);
+
+	if (!kvm->arch.tsc_immutable) {
+		vcpu_load(vcpu);
+		kvm_synchronize_tsc(vcpu, 0);
+		vcpu_put(vcpu);
+	}
 
 	/* poll control enabled by default */
 	vcpu->arch.msr_kvm_poll_control = 1;
@@ -11253,6 +11268,10 @@  int kvm_arch_hardware_enable(void)
 	if (backwards_tsc) {
 		u64 delta_cyc = max_tsc - local_tsc;
 		list_for_each_entry(kvm, &vm_list, vm_list) {
+			if (vcpu->kvm->arch.tsc_immutable) {
+				pr_warn_ratelimited("Backwards TSC observed and guest with immutable TSC active\n");
+				continue;
+			}
 			kvm->arch.backwards_tsc_observed = true;
 			kvm_for_each_vcpu(i, vcpu, kvm) {
 				vcpu->arch.tsc_offset_adjustment += delta_cyc;