@@ -523,10 +523,15 @@ struct kvm_vcpu_hv {
/* Xen HVM per vcpu emulation context */
struct kvm_vcpu_xen {
u64 hypercall_rip;
+ u32 current_runstate;
bool vcpu_info_set;
bool vcpu_time_info_set;
+ bool runstate_set;
struct gfn_to_hva_cache vcpu_info_cache;
struct gfn_to_hva_cache vcpu_time_info_cache;
+ struct gfn_to_hva_cache runstate_cache;
+ u64 last_steal;
+ u64 last_state_ns;
};
struct kvm_vcpu_arch {
@@ -2946,6 +2946,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
struct kvm_host_map map;
struct kvm_steal_time *st;
+ if (vcpu->arch.xen.runstate_set) {
+ kvm_xen_setup_runstate_page(vcpu);
+ return;
+ }
+
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -3999,6 +4004,11 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
struct kvm_host_map map;
struct kvm_steal_time *st;
+ if (vcpu->arch.xen.runstate_set) {
+ kvm_xen_runstate_set_preempted(vcpu);
+ return;
+ }
+
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -11,9 +11,11 @@
#include "hyperv.h"
#include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
#include <trace/events/kvm.h>
#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
#include "trace.h"
@@ -56,6 +58,124 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
return 0;
}
+static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state, u64 steal_ns)
+{
+ struct kvm_vcpu_xen *vcpu_xen = &v->arch.xen;
+ struct vcpu_runstate_info runstate;
+ unsigned int offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
+ u64 now, delta;
+
+ BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+
+#ifdef CONFIG_X86_64
+ /*
+ * The only difference is alignment of uint64_t in 32-bit.
+ * So the first field 'state' is accessed via *runstate_state
+ * which is unmodified, while the other fields are accessed
+ * through 'runstate->' which we tweak here by adding 4.
+ */
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+ offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+ offsetof(struct compat_vcpu_runstate_info, time) + 4);
+
+ offset = offsetof(struct vcpu_runstate_info, state_entry_time);
+#endif
+ /*
+ * Although it's called "state_entry_time" and explicitly documented
+ * as being "the system time at which the VCPU was last scheduled to
+ * run", Xen just treats it as a counter for HVM domains too.
+ */
+ if (kvm_read_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &runstate.state_entry_time, offset,
+ sizeof(u64) * 5))
+ return;
+
+ runstate.state_entry_time = XEN_RUNSTATE_UPDATE |
+ (runstate.state_entry_time + 1);
+
+ if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &runstate.state_entry_time, offset,
+ sizeof(u64)))
+ return;
+ smp_wmb();
+
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+ offsetof(struct compat_vcpu_runstate_info, state));
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+ sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+ if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &state,
+ offsetof(struct vcpu_runstate_info, state),
+ sizeof(runstate.state)))
+ return;
+
+ now = ktime_get_ns();
+ delta = now - vcpu_xen->last_state_ns - steal_ns;
+ runstate.time[vcpu_xen->current_runstate] += delta;
+ if (steal_ns)
+ runstate.time[RUNSTATE_runnable] += steal_ns;
+
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+ offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
+ BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
+ offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+ sizeof(((struct compat_vcpu_runstate_info *)0)->time));
+ if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &runstate.time[0],
+ offset + sizeof(u64),
+ sizeof(runstate.time)))
+ return;
+ smp_wmb();
+ vcpu_xen->current_runstate = state;
+ vcpu_xen->last_state_ns = now;
+
+ runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+ if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &runstate.state_entry_time, offset,
+ sizeof(u64)))
+ return;
+}
+
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *v)
+{
+ struct kvm_vcpu_xen *vcpu_xen = &v->arch.xen;
+ int new_state;
+
+ BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+ offsetof(struct compat_vcpu_runstate_info, state));
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+ sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+
+ if (v->preempted) {
+ new_state = RUNSTATE_runnable;
+ } else {
+ new_state = RUNSTATE_blocked;
+ vcpu_xen->last_steal = current->sched_info.run_delay;
+ }
+
+ kvm_xen_update_runstate(v, new_state, 0);
+}
+
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *v)
+{
+ struct kvm_vcpu_xen *vcpu_xen = &v->arch.xen;
+ u64 steal_time = 0;
+
+ /*
+ * If the CPU was blocked when it last stopped, presumably
+ * it became unblocked at some point because it's being run
+ * again now. The scheduler run_delay is the runnable time,
+ * to be subtracted from the blocked time.
+ */
+ if (vcpu_xen->current_runstate == RUNSTATE_blocked)
+ steal_time = current->sched_info.run_delay - vcpu_xen->last_steal;
+
+ kvm_xen_update_runstate(v, RUNSTATE_running, steal_time);
+}
+
int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
{
struct kvm_vcpu *v;
@@ -78,7 +198,6 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
v = kvm_get_vcpu_by_id(kvm, data->u.vcpu_attr.vcpu_id);
if (!v)
return -EINVAL;
-
/* No compat necessary here. */
BUILD_BUG_ON(sizeof(struct vcpu_info) !=
sizeof(struct compat_vcpu_info));
@@ -110,6 +229,22 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
break;
+ case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
+ v = kvm_get_vcpu_by_id(kvm, data->u.vcpu_attr.vcpu_id);
+ if (!v)
+ return -EINVAL;
+
+ r = kvm_gfn_to_hva_cache_init(kvm, &v->arch.xen.runstate_cache,
+ data->u.vcpu_attr.gpa,
+ sizeof(struct vcpu_runstate_info));
+ if (r)
+ return r;
+
+ v->arch.xen.runstate_set = true;
+ v->arch.xen.current_runstate = RUNSTATE_blocked;
+ v->arch.xen.last_state_ns = ktime_get_ns();
+ break;
+
default:
break;
}
@@ -157,6 +292,17 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
}
break;
+ case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
+ v = kvm_get_vcpu_by_id(kvm, data->u.vcpu_attr.vcpu_id);
+ if (!v)
+ return -EINVAL;
+
+ if (v->arch.xen.runstate_set) {
+ data->u.vcpu_attr.gpa = v->arch.xen.runstate_cache.gpa;
+ r = 0;
+ }
+ break;
+
default:
break;
}
@@ -9,6 +9,8 @@
#ifndef __ARCH_X86_KVM_XEN_H__
#define __ARCH_X86_KVM_XEN_H__
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu);
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu);
int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
@@ -56,4 +58,10 @@ struct compat_shared_info {
struct compat_arch_shared_info arch;
};
+struct compat_vcpu_runstate_info {
+ int state;
+ uint64_t state_entry_time;
+ uint64_t time[4];
+} __attribute__((packed));
+
#endif /* __ARCH_X86_KVM_XEN_H__ */
@@ -1606,6 +1606,7 @@ struct kvm_xen_hvm_attr {
#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
#define KVM_XEN_ATTR_TYPE_VCPU_INFO 0x2
#define KVM_XEN_ATTR_TYPE_VCPU_TIME_INFO 0x3
+#define KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE 0x4
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {