mbox series

[00/15] KVM: Add Xen hypercall and shared info pages

Message ID 20201204011848.2967588-1-dwmw2@infradead.org (mailing list archive)
Headers show
Series KVM: Add Xen hypercall and shared info pages | expand

Message

David Woodhouse Dec. 4, 2020, 1:18 a.m. UTC
Reviving the first section (so far) of a patch set that Joao posted as 
RFC last year:

https://lore.kernel.org/kvm/20190220201609.28290-1-joao.m.martins@oracle.com/

This adds basic hypercall interception support, and adds support for
timekeeping and runstate-related shared info regions.

I've updated and reworked the original a bit, including:
 • Support for 32-bit guests
 • 64-bit second support in wallclock
 • Time counters for runnable/blocked states in runstate support
 • Self-tests
 • Fixed Viridian coexistence
 • No new KVM_CAP_XEN_xxx, just more bits returned by KVM_CAP_XEN_HVM

I'm about to do the event channel support, but this part stands alone
and should be sufficient to get a fully functional Xen HVM guest running.

David Woodhouse (6):
      KVM: Fix arguments to kvm_{un,}map_gfn()
      KVM: x86/xen: Fix coexistence of Xen and Hyper-V hypercalls
      KVM: x86/xen: latch long_mode when hypercall page is set up
      KVM: x86/xen: add definitions of compat_shared_info, compat_vcpu_info
      xen: add wc_sec_hi to struct shared_info
      KVM: x86: declare Xen HVM shared info capability and add test case

Joao Martins (9):
      KVM: x86/xen: fix Xen hypercall page msr handling
      KVM: x86/xen: intercept xen hypercalls if enabled
      KVM: x86/xen: add KVM_XEN_HVM_SET_ATTR/KVM_XEN_HVM_GET_ATTR
      KVM: x86/xen: register shared_info page
      KVM: x86/xen: setup pvclock updates
      KVM: x86/xen: update wallclock region
      KVM: x86/xen: register vcpu info
      KVM: x86/xen: register vcpu time info region
      KVM: x86/xen: register runstate info

 arch/x86/include/asm/kvm_host.h                    |  23 +
 arch/x86/include/asm/xen/interface.h               |   3 +
 arch/x86/kvm/Makefile                              |   2 +-
 arch/x86/kvm/hyperv.c                              |  40 +-
 arch/x86/kvm/trace.h                               |  36 ++
 arch/x86/kvm/x86.c                                 | 109 +++--
 arch/x86/kvm/x86.h                                 |   1 +
 arch/x86/kvm/xen.c                                 | 544 +++++++++++++++++++++
 arch/x86/kvm/xen.h                                 |  83 ++++
 include/linux/kvm_host.h                           |  34 +-
 include/uapi/linux/kvm.h                           |  47 ++
 include/xen/interface/xen.h                        |   4 +-
 tools/testing/selftests/kvm/Makefile               |   2 +
 tools/testing/selftests/kvm/lib/kvm_util.c         |   1 +
 .../testing/selftests/kvm/x86_64/xen_shinfo_test.c | 187 +++++++
 .../testing/selftests/kvm/x86_64/xen_vmcall_test.c | 150 ++++++
 virt/kvm/kvm_main.c                                |   8 +-
 17 files changed, 1201 insertions(+), 73 deletions(-)

32-bit guests, and fleshed out the runstate information a little more
to add times for all of running, runnable, blocked states.

Comments

David Woodhouse Dec. 5, 2020, 10:48 a.m. UTC | #1
On Fri, 2020-12-04 at 01:18 +0000, David Woodhouse wrote:
> Reviving the first section (so far) of a patch set that Joao posted as 
> RFC last year:
> 
> https://lore.kernel.org/kvm/20190220201609.28290-1-joao.m.martins@oracle.com/
> 
> This adds basic hypercall interception support, and adds support for
> timekeeping and runstate-related shared info regions.
> 
> I've updated and reworked the original a bit, including:
>  • Support for 32-bit guests
>  • 64-bit second support in wallclock
>  • Time counters for runnable/blocked states in runstate support
>  • Self-tests
>  • Fixed Viridian coexistence
>  • No new KVM_CAP_XEN_xxx, just more bits returned by KVM_CAP_XEN_HVM
> 
> I'm about to do the event channel support, but this part stands alone
> and should be sufficient to get a fully functional Xen HVM guest running.

I also realise I forgot to include the RCU read-critical sections
around using the pointers that might get invalidated. (This is the
model I think can we use for the KVM pvclock and other pages that we
currently pin too, FWIW.)

That looks a bit like this. Unless there's any other useful and
relevant feedback I'll post a V2 of the series over the weekend or on
Monday.

diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index e49e59f93828..4aa776c1ad57 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -86,19 +86,21 @@ static void *xen_vcpu_info(struct kvm_vcpu *v)
 {
 	struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(v);
 	struct kvm_xen *kvm = &v->kvm->arch.xen;
-	unsigned int offset = 0;
-	void *hva = NULL;
+	void *hva;
 
-	if (vcpu_xen->vcpu_info)
-		return vcpu_xen->vcpu_info;
+	hva = READ_ONCE(vcpu_xen->vcpu_info);
+	if (hva)
+		return hva;
 
-	if (kvm->shinfo && v->vcpu_id < MAX_VIRT_CPUS) {
-		hva = kvm->shinfo;
-		offset += offsetof(struct shared_info, vcpu_info);
-		offset += v->vcpu_id * sizeof(struct vcpu_info);
+	if (v->vcpu_id < MAX_VIRT_CPUS)
+		hva = READ_ONCE(kvm->shinfo);
+
+	if (hva) {
+		hva += offsetof(struct shared_info, vcpu_info);
+		hva += v->vcpu_id * sizeof(struct vcpu_info);
 	}
 
-	return hva + offset;
+	return hva;
 }
 
 static void kvm_xen_update_vcpu_time(struct kvm_vcpu *v,
@@ -139,6 +141,7 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *vcpu, int state, u64 steal_
 	struct compat_vcpu_runstate_info *runstate;
 	u32 *runstate_state;
 	u64 now, delta;
+	int idx;
 
 	BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
 	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
@@ -146,7 +149,8 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *vcpu, int state, u64 steal_
 	BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
 		     sizeof(((struct compat_vcpu_runstate_info *)0)->state));
 
-	runstate = vcpu_xen->runstate;
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	runstate = READ_ONCE(vcpu_xen->runstate);
 	runstate_state = &runstate->state;
 
 #ifdef CONFIG_64BIT
@@ -185,6 +189,8 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *vcpu, int state, u64 steal_
 
 	runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
 	smp_wmb();
+
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 }
 
 void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
@@ -228,7 +234,9 @@ void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu)
 void kvm_xen_setup_pvclock_page(struct kvm_vcpu *v)
 {
 	struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(v);
-	struct vcpu_info *vcpu_info = xen_vcpu_info(v);
+	struct vcpu_info *vcpu_info;
+	struct pvclock_vcpu_time_info *pv_info;
+	int idx;
 
 	BUILD_BUG_ON(offsetof(struct shared_info, vcpu_info) != 0);
 	BUILD_BUG_ON(offsetof(struct compat_shared_info, vcpu_info) != 0);
@@ -236,12 +244,17 @@ void kvm_xen_setup_pvclock_page(struct kvm_vcpu *v)
 	BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
 		     offsetof(struct compat_vcpu_info, time));
 
+	idx = srcu_read_lock(&v->kvm->srcu);
+	vcpu_info = xen_vcpu_info(v);
 	if (likely(vcpu_info))
 		kvm_xen_update_vcpu_time(v, &vcpu_info->time);
 
 	/* Update secondary pvclock region if registered */
-	if (vcpu_xen->pv_time)
-		kvm_xen_update_vcpu_time(v, vcpu_xen->pv_time);
+	pv_info = READ_ONCE(vcpu_xen->pv_time);
+	if (pv_info)
+		kvm_xen_update_vcpu_time(v, pv_info);
+
+	srcu_read_unlock(&v->kvm->srcu, idx);
 }
 
 static int vcpu_attr_loc(struct kvm_vcpu *vcpu, u16 type,