diff mbox series

[RFC,02/39] KVM: x86/xen: intercept xen hypercalls if enabled

Message ID 20190220201609.28290-3-joao.m.martins@oracle.com (mailing list archive)
State New, archived
Headers show
Series x86/KVM: Xen HVM guest support | expand

Commit Message

Joao Martins Feb. 20, 2019, 8:15 p.m. UTC
Add a new exit reason for emulator to handle Xen hypercalls.
Albeit these are injected only if guest has initialized the Xen
hypercall page - the hypercall is just a convenience but one
that is done by pretty much all guests. Hence if the guest
sets the hypercall page, we assume a Xen guest is going to
be set up.

Emulator will then panic with:

KVM: unknown exit reason 28
RAX=0000000000000011 RBX=ffffffff81e03e94 RCX=0000000040000000
RDX=0000000000000000
RSI=ffffffff81e03e70 RDI=0000000000000006 RBP=ffffffff81e03e90
RSP=ffffffff81e03e68
R8 =73726576206e6558 R9 =ffffffff81e03e90 R10=ffffffff81e03e94
R11=2e362e34206e6f69
R12=0000000040000004 R13=ffffffff81e03e8c R14=ffffffff81e03e88
R15=0000000000000000
RIP=ffffffff81001228 RFL=00000082 [--S----] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0000 0000000000000000 ffffffff 00c00000
CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA]
SS =0000 0000000000000000 ffffffff 00c00000
DS =0000 0000000000000000 ffffffff 00c00000
FS =0000 0000000000000000 ffffffff 00c00000
GS =0000 ffffffff81f34000 ffffffff 00c00000
LDT=0000 0000000000000000 ffffffff 00c00000
TR =0020 0000000000000000 00000fff 00808b00 DPL=0 TSS64-busy
GDT=     ffffffff81f3c000 0000007f
IDT=     ffffffff83265000 00000fff
CR0=80050033 CR2=ffff880001fa6ff8 CR3=0000000001fa6000 CR4=000406a0
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000
DR3=0000000000000000
DR6=00000000ffff0ff0 DR7=0000000000000400
EFER=0000000000000d01
Code=cc cc cc cc cc cc cc cc cc cc cc cc b8 11 00 00 00 0f 01 c1 <c3> cc
cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc b8 12
00 00 00 0f

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 arch/x86/include/asm/kvm_host.h | 13 +++++++
 arch/x86/kvm/Makefile           |  2 +-
 arch/x86/kvm/trace.h            | 33 +++++++++++++++++
 arch/x86/kvm/x86.c              | 12 +++++++
 arch/x86/kvm/xen.c              | 79 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/xen.h              | 10 ++++++
 include/uapi/linux/kvm.h        | 17 ++++++++-
 7 files changed, 164 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/kvm/xen.c
 create mode 100644 arch/x86/kvm/xen.h

Comments

Sean Christopherson Feb. 21, 2019, 6:29 p.m. UTC | #1
On Wed, Feb 20, 2019 at 08:15:32PM +0000, Joao Martins wrote:
> Add a new exit reason for emulator to handle Xen hypercalls.
> Albeit these are injected only if guest has initialized the Xen
> hypercall page - the hypercall is just a convenience but one
> that is done by pretty much all guests. Hence if the guest
> sets the hypercall page, we assume a Xen guest is going to
> be set up.
> 
> Emulator will then panic with:
> 
> KVM: unknown exit reason 28
> RAX=0000000000000011 RBX=ffffffff81e03e94 RCX=0000000040000000
> RDX=0000000000000000
> RSI=ffffffff81e03e70 RDI=0000000000000006 RBP=ffffffff81e03e90
> RSP=ffffffff81e03e68
> R8 =73726576206e6558 R9 =ffffffff81e03e90 R10=ffffffff81e03e94
> R11=2e362e34206e6f69
> R12=0000000040000004 R13=ffffffff81e03e8c R14=ffffffff81e03e88
> R15=0000000000000000
> RIP=ffffffff81001228 RFL=00000082 [--S----] CPL=0 II=0 A20=1 SMM=0 HLT=0
> ES =0000 0000000000000000 ffffffff 00c00000
> CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA]
> SS =0000 0000000000000000 ffffffff 00c00000
> DS =0000 0000000000000000 ffffffff 00c00000
> FS =0000 0000000000000000 ffffffff 00c00000
> GS =0000 ffffffff81f34000 ffffffff 00c00000
> LDT=0000 0000000000000000 ffffffff 00c00000
> TR =0020 0000000000000000 00000fff 00808b00 DPL=0 TSS64-busy
> GDT=     ffffffff81f3c000 0000007f
> IDT=     ffffffff83265000 00000fff
> CR0=80050033 CR2=ffff880001fa6ff8 CR3=0000000001fa6000 CR4=000406a0
> DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000
> DR3=0000000000000000
> DR6=00000000ffff0ff0 DR7=0000000000000400
> EFER=0000000000000d01
> Code=cc cc cc cc cc cc cc cc cc cc cc cc b8 11 00 00 00 0f 01 c1 <c3> cc
> cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc b8 12
> 00 00 00 0f
> 
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>  arch/x86/include/asm/kvm_host.h | 13 +++++++
>  arch/x86/kvm/Makefile           |  2 +-
>  arch/x86/kvm/trace.h            | 33 +++++++++++++++++
>  arch/x86/kvm/x86.c              | 12 +++++++
>  arch/x86/kvm/xen.c              | 79 +++++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/xen.h              | 10 ++++++
>  include/uapi/linux/kvm.h        | 17 ++++++++-
>  7 files changed, 164 insertions(+), 2 deletions(-)
>  create mode 100644 arch/x86/kvm/xen.c
>  create mode 100644 arch/x86/kvm/xen.h

...

> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index 31ecf7a76d5a..2b46c93c9380 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -10,7 +10,7 @@ kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
>  
>  kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
>  			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
> -			   hyperv.o page_track.o debugfs.o
> +			   hyperv.o xen.o page_track.o debugfs.o

Can this be wrapped in a config?  Or even better, as a loadable module?
2k+ lines of code is a non-trival amount of baggage for folks that don't
care about running Xen guests.  I've only glanced through the series, so
I've no idea if the resulting code would be an abomination.

>  
>  kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
>  kvm-amd-y		+= svm.o pmu_amd.o
Joao Martins Feb. 21, 2019, 8:56 p.m. UTC | #2
On 2/21/19 6:29 PM, Sean Christopherson wrote:
> On Wed, Feb 20, 2019 at 08:15:32PM +0000, Joao Martins wrote:
>> Add a new exit reason for emulator to handle Xen hypercalls.
>> Albeit these are injected only if guest has initialized the Xen
>> hypercall page - the hypercall is just a convenience but one
>> that is done by pretty much all guests. Hence if the guest
>> sets the hypercall page, we assume a Xen guest is going to
>> be set up.
>>
>> Emulator will then panic with:
>>
>> KVM: unknown exit reason 28
>> RAX=0000000000000011 RBX=ffffffff81e03e94 RCX=0000000040000000
>> RDX=0000000000000000
>> RSI=ffffffff81e03e70 RDI=0000000000000006 RBP=ffffffff81e03e90
>> RSP=ffffffff81e03e68
>> R8 =73726576206e6558 R9 =ffffffff81e03e90 R10=ffffffff81e03e94
>> R11=2e362e34206e6f69
>> R12=0000000040000004 R13=ffffffff81e03e8c R14=ffffffff81e03e88
>> R15=0000000000000000
>> RIP=ffffffff81001228 RFL=00000082 [--S----] CPL=0 II=0 A20=1 SMM=0 HLT=0
>> ES =0000 0000000000000000 ffffffff 00c00000
>> CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA]
>> SS =0000 0000000000000000 ffffffff 00c00000
>> DS =0000 0000000000000000 ffffffff 00c00000
>> FS =0000 0000000000000000 ffffffff 00c00000
>> GS =0000 ffffffff81f34000 ffffffff 00c00000
>> LDT=0000 0000000000000000 ffffffff 00c00000
>> TR =0020 0000000000000000 00000fff 00808b00 DPL=0 TSS64-busy
>> GDT=     ffffffff81f3c000 0000007f
>> IDT=     ffffffff83265000 00000fff
>> CR0=80050033 CR2=ffff880001fa6ff8 CR3=0000000001fa6000 CR4=000406a0
>> DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000
>> DR3=0000000000000000
>> DR6=00000000ffff0ff0 DR7=0000000000000400
>> EFER=0000000000000d01
>> Code=cc cc cc cc cc cc cc cc cc cc cc cc b8 11 00 00 00 0f 01 c1 <c3> cc
>> cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc b8 12
>> 00 00 00 0f
>>
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>>  arch/x86/include/asm/kvm_host.h | 13 +++++++
>>  arch/x86/kvm/Makefile           |  2 +-
>>  arch/x86/kvm/trace.h            | 33 +++++++++++++++++
>>  arch/x86/kvm/x86.c              | 12 +++++++
>>  arch/x86/kvm/xen.c              | 79 +++++++++++++++++++++++++++++++++++++++++
>>  arch/x86/kvm/xen.h              | 10 ++++++
>>  include/uapi/linux/kvm.h        | 17 ++++++++-
>>  7 files changed, 164 insertions(+), 2 deletions(-)
>>  create mode 100644 arch/x86/kvm/xen.c
>>  create mode 100644 arch/x86/kvm/xen.h
> 
> ...
> 
>> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
>> index 31ecf7a76d5a..2b46c93c9380 100644
>> --- a/arch/x86/kvm/Makefile
>> +++ b/arch/x86/kvm/Makefile
>> @@ -10,7 +10,7 @@ kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
>>  
>>  kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
>>  			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
>> -			   hyperv.o page_track.o debugfs.o
>> +			   hyperv.o xen.o page_track.o debugfs.o
> 
> Can this be wrapped in a config?  Or even better, as a loadable module?

Turning that into a loadable module might be a little trickier, but I think it
is doable if that's what folks/maintainers would prefer.

The Xen addition follows the same structure as Hyper-V in kvm (what you suggest
here is probably applicable for both). i.e. there's some Xen specific routines
for vm/vcpu init/teardown, and timer handling. We would have to place some of
those functions into a struct that gets registered at modinit.

> 2k+ lines of code is a non-trival amount of baggage for folks that don't
> care about running Xen guests.  I've only glanced through the series, so
> I've no idea if the resulting code would be an abomination.
> 
>>  
>>  kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
>>  kvm-amd-y		+= svm.o pmu_amd.o
Sean Christopherson Feb. 22, 2019, 12:30 a.m. UTC | #3
On Thu, Feb 21, 2019 at 08:56:06PM +0000, Joao Martins wrote:
> On 2/21/19 6:29 PM, Sean Christopherson wrote:
> > On Wed, Feb 20, 2019 at 08:15:32PM +0000, Joao Martins wrote:
> >> Add a new exit reason for emulator to handle Xen hypercalls.
> >> Albeit these are injected only if guest has initialized the Xen
> >> hypercall page - the hypercall is just a convenience but one
> >> that is done by pretty much all guests. Hence if the guest
> >> sets the hypercall page, we assume a Xen guest is going to
> >> be set up.
> >>
> >> Emulator will then panic with:
> >>
> >> KVM: unknown exit reason 28
> >> RAX=0000000000000011 RBX=ffffffff81e03e94 RCX=0000000040000000
> >> RDX=0000000000000000
> >> RSI=ffffffff81e03e70 RDI=0000000000000006 RBP=ffffffff81e03e90
> >> RSP=ffffffff81e03e68
> >> R8 =73726576206e6558 R9 =ffffffff81e03e90 R10=ffffffff81e03e94
> >> R11=2e362e34206e6f69
> >> R12=0000000040000004 R13=ffffffff81e03e8c R14=ffffffff81e03e88
> >> R15=0000000000000000
> >> RIP=ffffffff81001228 RFL=00000082 [--S----] CPL=0 II=0 A20=1 SMM=0 HLT=0
> >> ES =0000 0000000000000000 ffffffff 00c00000
> >> CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA]
> >> SS =0000 0000000000000000 ffffffff 00c00000
> >> DS =0000 0000000000000000 ffffffff 00c00000
> >> FS =0000 0000000000000000 ffffffff 00c00000
> >> GS =0000 ffffffff81f34000 ffffffff 00c00000
> >> LDT=0000 0000000000000000 ffffffff 00c00000
> >> TR =0020 0000000000000000 00000fff 00808b00 DPL=0 TSS64-busy
> >> GDT=     ffffffff81f3c000 0000007f
> >> IDT=     ffffffff83265000 00000fff
> >> CR0=80050033 CR2=ffff880001fa6ff8 CR3=0000000001fa6000 CR4=000406a0
> >> DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000
> >> DR3=0000000000000000
> >> DR6=00000000ffff0ff0 DR7=0000000000000400
> >> EFER=0000000000000d01
> >> Code=cc cc cc cc cc cc cc cc cc cc cc cc b8 11 00 00 00 0f 01 c1 <c3> cc
> >> cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc b8 12
> >> 00 00 00 0f
> >>
> >> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> >> ---
> >>  arch/x86/include/asm/kvm_host.h | 13 +++++++
> >>  arch/x86/kvm/Makefile           |  2 +-
> >>  arch/x86/kvm/trace.h            | 33 +++++++++++++++++
> >>  arch/x86/kvm/x86.c              | 12 +++++++
> >>  arch/x86/kvm/xen.c              | 79 +++++++++++++++++++++++++++++++++++++++++
> >>  arch/x86/kvm/xen.h              | 10 ++++++
> >>  include/uapi/linux/kvm.h        | 17 ++++++++-
> >>  7 files changed, 164 insertions(+), 2 deletions(-)
> >>  create mode 100644 arch/x86/kvm/xen.c
> >>  create mode 100644 arch/x86/kvm/xen.h
> > 
> > ...
> > 
> >> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> >> index 31ecf7a76d5a..2b46c93c9380 100644
> >> --- a/arch/x86/kvm/Makefile
> >> +++ b/arch/x86/kvm/Makefile
> >> @@ -10,7 +10,7 @@ kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
> >>  
> >>  kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
> >>  			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
> >> -			   hyperv.o page_track.o debugfs.o
> >> +			   hyperv.o xen.o page_track.o debugfs.o
> > 
> > Can this be wrapped in a config?  Or even better, as a loadable module?
> 
> Turning that into a loadable module might be a little trickier, but I think it
> is doable if that's what folks/maintainers would prefer.
> 
> The Xen addition follows the same structure as Hyper-V in kvm (what you suggest
> here is probably applicable for both). i.e. there's some Xen specific routines
> for vm/vcpu init/teardown, and timer handling. We would have to place some of
> those functions into a struct that gets registered at modinit.

Huh.  I never really hooked at the Hyper-V code, for some reason I always
assumed it was only related to running KVM on Hyper-V.  I agree that this
extra hurdle only makes sense if it's also applied to Hyper-V.
Paolo Bonzini Feb. 22, 2019, 12:50 p.m. UTC | #4
On 22/02/19 01:30, Sean Christopherson wrote:
> On Thu, Feb 21, 2019 at 08:56:06PM +0000, Joao Martins wrote:
>> The Xen addition follows the same structure as Hyper-V in kvm (what you suggest
>> here is probably applicable for both). i.e. there's some Xen specific routines
>> for vm/vcpu init/teardown, and timer handling. We would have to place some of
>> those functions into a struct that gets registered at modinit.
> 
> Huh.  I never really hooked at the Hyper-V code, for some reason I always
> assumed it was only related to running KVM on Hyper-V.  I agree that this
> extra hurdle only makes sense if it's also applied to Hyper-V.

The difference is that Hyper-V support is more or less mandatory to run
recent Windows guests.  Having a config symbol would be enough for me.

Paolo
David Woodhouse Dec. 1, 2020, 9:48 a.m. UTC | #5
On Wed, 2019-02-20 at 20:15 +0000, Joao Martins wrote:
> Add a new exit reason for emulator to handle Xen hypercalls.
> Albeit these are injected only if guest has initialized the Xen
> hypercall page 

I've reworked this a little.

I didn't like the inconsistency of allowing userspace to provide the
hypercall pages even though the ABI is now defined by the kernel and it
*has* to be VMCALL/VMMCALL.

So I switched it to generate the hypercall page directly from the
kernel, just like we do for the Hyper-V hypercall page.

I introduced a new flag in the xen_hvm_config struct to enable this
behaviour, and advertised it in the KVM_CAP_XEN_HVM return value.

I also added the cpl and support for 6-argument hypercalls, and made it
check the guest RIP when completing the call as discussed (although I
still think that probably ought to be a generic thing).

I adjusted the test case from my version of the patch, and added
support for actually testing the hypercall page MSR.

https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/xenpv

I'll go through and rebase your patch series at least up to patch 16
and collect them in that tree, then probably post them for real once
I've got everything working locally.


=======================
From c037c329c8867b219afe2100e383c62e9db7b06d Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Wed, 13 Jun 2018 09:55:44 -0400
Subject: [PATCH] KVM: x86/xen: intercept xen hypercalls if enabled

Add a new exit reason for emulator to handle Xen hypercalls.

Since this means KVM owns the ABI, dispense with the facility for the
VMM to provide its own copy of the hypercall pages; just fill them in
directly using VMCALL/VMMCALL as we do for the Hyper-V hypercall page.

This behaviour is enabled by a new INTERCEPT_HCALL flag in the
KVM_XEN_HVM_CONFIG ioctl structure, and advertised by the same flag
being returned from the KVM_CAP_XEN_HVM check.

Add a test case and shift xen_hvm_config() to the nascent xen.c while
we're at it.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/include/asm/kvm_host.h               |   6 +
 arch/x86/kvm/Makefile                         |   2 +-
 arch/x86/kvm/trace.h                          |  36 +++++
 arch/x86/kvm/x86.c                            |  46 +++---
 arch/x86/kvm/xen.c                            | 140 ++++++++++++++++++
 arch/x86/kvm/xen.h                            |  21 +++
 include/uapi/linux/kvm.h                      |  19 +++
 tools/testing/selftests/kvm/Makefile          |   1 +
 tools/testing/selftests/kvm/lib/kvm_util.c    |   1 +
 .../selftests/kvm/x86_64/xen_vmcall_test.c    | 123 +++++++++++++++
 10 files changed, 365 insertions(+), 30 deletions(-)
 create mode 100644 arch/x86/kvm/xen.c
 create mode 100644 arch/x86/kvm/xen.h
 create mode 100644 tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7e5f33a0d0e2..9de3229e91e1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -520,6 +520,11 @@ struct kvm_vcpu_hv {
 	cpumask_t tlb_flush;
 };
 
+/* Xen HVM per vcpu emulation context */
+struct kvm_vcpu_xen {
+	u64 hypercall_rip;
+};
+
 struct kvm_vcpu_arch {
 	/*
 	 * rip and regs accesses must go through
@@ -717,6 +722,7 @@ struct kvm_vcpu_arch {
 	unsigned long singlestep_rip;
 
 	struct kvm_vcpu_hv hyperv;
+	struct kvm_vcpu_xen xen;
 
 	cpumask_var_t wbinvd_dirty_mask;
 
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b804444e16d4..8bee4afc1fec 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -13,7 +13,7 @@ kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
-kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o \
+kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o xen.o \
 			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
 			   hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
 			   mmu/spte.o mmu/tdp_iter.o mmu/tdp_mmu.o
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index aef960f90f26..d28ecb37b62c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -92,6 +92,42 @@ TRACE_EVENT(kvm_hv_hypercall,
 		  __entry->outgpa)
 );
 
+/*
+ * Tracepoint for Xen hypercall.
+ */
+TRACE_EVENT(kvm_xen_hypercall,
+	TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
+		 unsigned long a2, unsigned long a3, unsigned long a4,
+		 unsigned long a5),
+	    TP_ARGS(nr, a0, a1, a2, a3, a4, a5),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, nr)
+		__field(unsigned long, a0)
+		__field(unsigned long, a1)
+		__field(unsigned long, a2)
+		__field(unsigned long, a3)
+		__field(unsigned long, a4)
+		__field(unsigned long, a5)
+	),
+
+	TP_fast_assign(
+		__entry->nr = nr;
+		__entry->a0 = a0;
+		__entry->a1 = a1;
+		__entry->a2 = a2;
+		__entry->a3 = a3;
+		__entry->a4 = a4;
+		__entry->a4 = a5;
+	),
+
+	TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx",
+		  __entry->nr, __entry->a0, __entry->a1,  __entry->a2,
+		  __entry->a3, __entry->a4, __entry->a5)
+);
+
+
+
 /*
  * Tracepoint for PIO.
  */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0836023963ec..adf04e8cc64a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -29,6 +29,7 @@
 #include "pmu.h"
 #include "hyperv.h"
 #include "lapic.h"
+#include "xen.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -2842,32 +2843,6 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	return 0;
 }
 
-static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
-{
-	struct kvm *kvm = vcpu->kvm;
-	int lm = is_long_mode(vcpu);
-	u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
-		: (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
-	u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
-		: kvm->arch.xen_hvm_config.blob_size_32;
-	u32 page_num = data & ~PAGE_MASK;
-	u64 page_addr = data & PAGE_MASK;
-	u8 *page;
-
-	if (page_num >= blob_size)
-		return 1;
-
-	page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
-	if (IS_ERR(page))
-		return PTR_ERR(page);
-
-	if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
-		kfree(page);
-		return 1;
-	}
-	return 0;
-}
-
 static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
 {
 	u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
@@ -3002,7 +2977,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	u64 data = msr_info->data;
 
 	if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
-		return xen_hvm_config(vcpu, data);
+		return kvm_xen_hvm_config(vcpu, data);
 
 	switch (msr) {
 	case MSR_AMD64_NB_CFG:
@@ -3703,7 +3678,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_PIT2:
 	case KVM_CAP_PIT_STATE2:
 	case KVM_CAP_SET_IDENTITY_MAP_ADDR:
-	case KVM_CAP_XEN_HVM:
 	case KVM_CAP_VCPU_EVENTS:
 	case KVM_CAP_HYPERV:
 	case KVM_CAP_HYPERV_VAPIC:
@@ -3742,6 +3716,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
 		r = 1;
 		break;
+	case KVM_CAP_XEN_HVM:
+		r = 1 | KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL;
+		break;
 	case KVM_CAP_SYNC_REGS:
 		r = KVM_SYNC_X86_VALID_FIELDS;
 		break;
@@ -5603,7 +5580,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (copy_from_user(&xhc, argp, sizeof(xhc)))
 			goto out;
 		r = -EINVAL;
-		if (xhc.flags)
+		if (xhc.flags & ~KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL)
+			goto out;
+		/*
+		 * With hypercall interception the kernel generates its own
+		 * hypercall page so it must not be provided.
+		 */
+		if ((xhc.flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) &&
+		    (xhc.blob_addr_32 || xhc.blob_addr_64 ||
+		     xhc.blob_size_32 || xhc.blob_size_64))
 			goto out;
 		memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
 		r = 0;
@@ -8066,6 +8051,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	unsigned long nr, a0, a1, a2, a3, ret;
 	int op_64_bit;
 
+	if (kvm_xen_hypercall_enabled(vcpu->kvm))
+		return kvm_xen_hypercall(vcpu);
+
 	if (kvm_hv_hypercall_enabled(vcpu->kvm))
 		return kvm_hv_hypercall(vcpu);
 
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
new file mode 100644
index 000000000000..6400a4bc8480
--- /dev/null
+++ b/arch/x86/kvm/xen.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
+ * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * KVM Xen emulation
+ */
+
+#include "x86.h"
+#include "xen.h"
+
+#include <linux/kvm_host.h>
+
+#include <trace/events/kvm.h>
+
+#include "trace.h"
+
+int kvm_xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
+	{
+	struct kvm *kvm = vcpu->kvm;
+	u32 page_num = data & ~PAGE_MASK;
+	u64 page_addr = data & PAGE_MASK;
+
+	/*
+	 * If Xen hypercall intercept is enabled, fill the hypercall
+	 * page with VMCALL/VMMCALL instructions since that's what
+	 * we catch. Else the VMM has provided the hypercall pages
+	 * with instructions of its own choosing, so use those.
+	 */
+	if (kvm_xen_hypercall_enabled(kvm)) {
+		u8 instructions[32];
+		int i;
+
+		if (page_num)
+			return 1;
+
+		/* mov imm32, %eax */
+		instructions[0] = 0xb8;
+
+		/* vmcall / vmmcall */
+		kvm_x86_ops.patch_hypercall(vcpu, instructions + 5);
+
+		/* ret */
+		instructions[8] = 0xc3;
+
+		/* int3 to pad */
+		memset(instructions + 9, 0xcc, sizeof(instructions) - 9);
+
+		for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) {
+			*(u32 *)&instructions[1] = i;
+			if (kvm_vcpu_write_guest(vcpu,
+						 page_addr + (i * sizeof(instructions)),
+						 instructions, sizeof(instructions)))
+				return 1;
+		}
+	} else {
+		int lm = is_long_mode(vcpu);
+		u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
+				   : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
+		u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
+				  : kvm->arch.xen_hvm_config.blob_size_32;
+		u8 *page;
+
+		if (page_num >= blob_size)
+			return 1;
+
+		page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
+			kfree(page);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
+{
+	kvm_rax_write(vcpu, result);
+	return kvm_skip_emulated_instruction(vcpu);
+}
+
+static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+
+	if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip)))
+		return 1;
+
+	return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
+}
+
+int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
+{
+	bool longmode;
+	u64 input, params[6];
+
+	input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX);
+
+	longmode = is_64_bit_mode(vcpu);
+	if (!longmode) {
+		params[0] = (u32)kvm_rbx_read(vcpu);
+		params[1] = (u32)kvm_rcx_read(vcpu);
+		params[2] = (u32)kvm_rdx_read(vcpu);
+		params[3] = (u32)kvm_rsi_read(vcpu);
+		params[4] = (u32)kvm_rdi_read(vcpu);
+		params[5] = (u32)kvm_rbp_read(vcpu);
+	}
+#ifdef CONFIG_X86_64
+	else {
+		params[0] = (u64)kvm_rdi_read(vcpu);
+		params[1] = (u64)kvm_rsi_read(vcpu);
+		params[2] = (u64)kvm_rdx_read(vcpu);
+		params[3] = (u64)kvm_r10_read(vcpu);
+		params[4] = (u64)kvm_r8_read(vcpu);
+		params[5] = (u64)kvm_r9_read(vcpu);
+	}
+#endif
+	trace_kvm_xen_hypercall(input, params[0], params[1], params[2],
+				params[3], params[4], params[5]);
+
+	vcpu->run->exit_reason = KVM_EXIT_XEN;
+	vcpu->run->xen.type = KVM_EXIT_XEN_HCALL;
+	vcpu->run->xen.u.hcall.longmode = longmode;
+	vcpu->run->xen.u.hcall.cpl = kvm_x86_ops.get_cpl(vcpu);
+	vcpu->run->xen.u.hcall.input = input;
+	vcpu->run->xen.u.hcall.params[0] = params[0];
+	vcpu->run->xen.u.hcall.params[1] = params[1];
+	vcpu->run->xen.u.hcall.params[2] = params[2];
+	vcpu->run->xen.u.hcall.params[3] = params[3];
+	vcpu->run->xen.u.hcall.params[4] = params[4];
+	vcpu->run->xen.u.hcall.params[5] = params[5];
+	vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu);
+	vcpu->arch.complete_userspace_io =
+		kvm_xen_hypercall_complete_userspace;
+
+	return 0;
+}
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
new file mode 100644
index 000000000000..81e12f716d2e
--- /dev/null
+++ b/arch/x86/kvm/xen.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
+ * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * KVM Xen emulation
+ */
+
+#ifndef __ARCH_X86_KVM_XEN_H__
+#define __ARCH_X86_KVM_XEN_H__
+
+int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
+int kvm_xen_hvm_config(struct kvm_vcpu *vcpu, u64 data);
+
+static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
+{
+	return kvm->arch.xen_hvm_config.flags &
+		KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL;
+}
+
+#endif /* __ARCH_X86_KVM_XEN_H__ */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index ca41220b40b8..00221fe56994 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -216,6 +216,20 @@ struct kvm_hyperv_exit {
 	} u;
 };
 
+struct kvm_xen_exit {
+#define KVM_EXIT_XEN_HCALL          1
+	__u32 type;
+	union {
+		struct {
+			__u32 longmode;
+			__u32 cpl;
+			__u64 input;
+			__u64 result;
+			__u64 params[6];
+		} hcall;
+	} u;
+};
+
 #define KVM_S390_GET_SKEYS_NONE   1
 #define KVM_S390_SKEYS_MAX        1048576
 
@@ -250,6 +264,7 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_ARM_NISV         28
 #define KVM_EXIT_X86_RDMSR        29
 #define KVM_EXIT_X86_WRMSR        30
+#define KVM_EXIT_XEN              31
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -426,6 +441,8 @@ struct kvm_run {
 			__u32 index; /* kernel -> user */
 			__u64 data; /* kernel <-> user */
 		} msr;
+		/* KVM_EXIT_XEN */
+		struct kvm_xen_exit xen;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -1126,6 +1143,8 @@ struct kvm_x86_mce {
 #endif
 
 #ifdef KVM_CAP_XEN_HVM
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL	(1 << 1)
+
 struct kvm_xen_hvm_config {
 	__u32 flags;
 	__u32 msr;
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 3d14ef77755e..d94abec627e6 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -59,6 +59,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
 TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
 TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
 TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
+TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 126c6727a6b0..6e96ae47d28c 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1654,6 +1654,7 @@ static struct exit_reason {
 	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
 	{KVM_EXIT_OSI, "OSI"},
 	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+	{KVM_EXIT_XEN, "XEN"},
 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
 	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
 #endif
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
new file mode 100644
index 000000000000..3f1dd93626e5
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_vmcall_test
+ *
+ * Copyright © 2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Userspace hypercall testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID		5
+
+#define HCALL_REGION_GPA	0xc0000000ULL
+#define HCALL_REGION_SLOT	10
+
+static struct kvm_vm *vm;
+
+#define INPUTVALUE 17
+#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
+#define RETVALUE 0xcafef00dfbfbffffUL
+
+#define XEN_HYPERCALL_MSR 0x40000000
+
+static void guest_code(void)
+{
+	unsigned long rax = INPUTVALUE;
+	unsigned long rdi = ARGVALUE(1);
+	unsigned long rsi = ARGVALUE(2);
+	unsigned long rdx = ARGVALUE(3);
+	register unsigned long r10 __asm__("r10") = ARGVALUE(4);
+	register unsigned long r8 __asm__("r8") = ARGVALUE(5);
+	register unsigned long r9 __asm__("r9") = ARGVALUE(6);
+
+	/* First a direct invocation of 'vmcall' */
+	__asm__ __volatile__("vmcall" :
+			     "=a"(rax) :
+			     "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+			     "r"(r10), "r"(r8), "r"(r9));
+	GUEST_ASSERT(rax == RETVALUE);
+
+	/* Now fill in the hypercall page */
+	__asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
+			     "a" (HCALL_REGION_GPA & 0xffffffff),
+			     "d" (HCALL_REGION_GPA >> 32));
+
+	/* And invoke the same hypercall that way */
+	__asm__ __volatile__("call *%1" : "=a"(rax) :
+			     "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
+			     "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+			     "r"(r10), "r"(r8), "r"(r9));
+	GUEST_ASSERT(rax == RETVALUE);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
+	      KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) ) {
+		print_skip("KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL not available");
+		exit(KSFT_SKIP);
+	}
+
+	vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	struct kvm_xen_hvm_config hvmc = {
+		.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+		.msr = XEN_HYPERCALL_MSR,
+	};
+	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+	/* Map a region for the hypercall page */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                    HCALL_REGION_GPA, HCALL_REGION_SLOT,
+				    getpagesize(), 0);
+	virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 1, 0);
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+		struct ucall uc;
+
+		vcpu_run(vm, VCPU_ID);
+
+		if (run->exit_reason == KVM_EXIT_XEN) {
+			ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+			ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+			ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+			ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+			ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+			ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+			ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+			ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+			ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+			ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+			run->xen.u.hcall.result = RETVALUE;
+			continue;
+		}
+
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s", (const char *)uc.args[0]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+		}
+	}
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
David Woodhouse Dec. 1, 2020, 11:19 a.m. UTC | #6
On Tue, 2020-12-01 at 09:48 +0000, David Woodhouse wrote:
> So I switched it to generate the hypercall page directly from the
> kernel, just like we do for the Hyper-V hypercall page.

Speaking of Hyper-V... I'll post this one now so you can start heckling
it.

I won't post the rest as I go; not much of the rest of the series when
I eventually post it will be very new and exciting. It'll just be
rebasing and tweaking your originals and perhaps adding some tests.

=======================
From db020c521c3a96b698a78d4e62cdd19da3831585 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Tue, 1 Dec 2020 10:59:26 +0000
Subject: [PATCH] KVM: x86: Fix coexistence of Xen and Hyper-V hypercalls

Disambiguate Xen vs. Hyper-V calls by adding 'orl $0x80000000, %eax'
at the start of the Hyper-V hypercall page when Xen hypercalls are
also enabled.

That bit is reserved in the Hyper-V ABI, and those hypercall numbers
will never be used by Xen (because it does precisely the same trick).

Switch to using kvm_vcpu_write_guest() while we're at it, instead of
open-coding it.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/kvm/hyperv.c                         | 40 ++++++++++++++-----
 arch/x86/kvm/xen.c                            |  6 +++
 .../selftests/kvm/x86_64/xen_vmcall_test.c    | 39 +++++++++++++++---
 3 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 5c7c4060b45c..347ff9ad70b3 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -23,6 +23,7 @@
 #include "ioapic.h"
 #include "cpuid.h"
 #include "hyperv.h"
+#include "xen.h"
 
 #include <linux/cpu.h>
 #include <linux/kvm_host.h>
@@ -1139,9 +1140,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 			hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
 		break;
 	case HV_X64_MSR_HYPERCALL: {
-		u64 gfn;
-		unsigned long addr;
-		u8 instructions[4];
+		u8 instructions[9];
+		int i = 0;
+		u64 addr;
 
 		/* if guest os id is not set hypercall should remain disabled */
 		if (!hv->hv_guest_os_id)
@@ -1150,16 +1151,33 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 			hv->hv_hypercall = data;
 			break;
 		}
-		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
-		addr = gfn_to_hva(kvm, gfn);
-		if (kvm_is_error_hva(addr))
-			return 1;
-		kvm_x86_ops.patch_hypercall(vcpu, instructions);
-		((unsigned char *)instructions)[3] = 0xc3; /* ret */
-		if (__copy_to_user((void __user *)addr, instructions, 4))
+
+		/*
+		 * If Xen and Hyper-V hypercalls are both enabled, disambiguate
+		 * the same way Xen itself does, by setting the bit 31 of EAX
+		 * which is RsvdZ in the 32-bit Hyper-V hypercall ABI and just
+		 * going to be clobbered on 64-bit.
+		 */
+		if (kvm_xen_hypercall_enabled(kvm)) {
+			/* orl $0x80000000, %eax */
+			instructions[i++] = 0x0d;
+			instructions[i++] = 0x00;
+			instructions[i++] = 0x00;
+			instructions[i++] = 0x00;
+			instructions[i++] = 0x80;
+		}
+
+		/* vmcall/vmmcall */
+		kvm_x86_ops.patch_hypercall(vcpu, instructions + i);
+		i += 3;
+
+		/* ret */
+		((unsigned char *)instructions)[i++] = 0xc3;
+
+		addr = data & HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK;
+		if (kvm_vcpu_write_guest(vcpu, addr, instructions, i))
 			return 1;
 		hv->hv_hypercall = data;
-		mark_page_dirty(kvm, gfn);
 		break;
 	}
 	case HV_X64_MSR_REFERENCE_TSC:
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 6400a4bc8480..c3426213a970 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -8,6 +8,7 @@
 
 #include "x86.h"
 #include "xen.h"
+#include "hyperv.h"
 
 #include <linux/kvm_host.h>
 
@@ -99,6 +100,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
 
 	input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX);
 
+	/* Hyper-V hypercalls get bit 31 set in EAX */
+	if ((input & 0x80000000) &&
+	    kvm_hv_hypercall_enabled(vcpu->kvm))
+		return kvm_hv_hypercall(vcpu);
+
 	longmode = is_64_bit_mode(vcpu);
 	if (!longmode) {
 		params[0] = (u32)kvm_rbx_read(vcpu);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
index 3f1dd93626e5..24f279e1a66b 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -15,6 +15,7 @@
 
 #define HCALL_REGION_GPA	0xc0000000ULL
 #define HCALL_REGION_SLOT	10
+#define PAGE_SIZE		4096
 
 static struct kvm_vm *vm;
 
@@ -22,7 +23,12 @@ static struct kvm_vm *vm;
 #define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
 #define RETVALUE 0xcafef00dfbfbffffUL
 
-#define XEN_HYPERCALL_MSR 0x40000000
+#define XEN_HYPERCALL_MSR	0x40000200
+#define HV_GUEST_OS_ID_MSR	0x40000000
+#define HV_HYPERCALL_MSR	0x40000001
+
+#define HVCALL_SIGNAL_EVENT		0x005d
+#define HV_STATUS_INVALID_ALIGNMENT	4
 
 static void guest_code(void)
 {
@@ -30,6 +36,7 @@ static void guest_code(void)
 	unsigned long rdi = ARGVALUE(1);
 	unsigned long rsi = ARGVALUE(2);
 	unsigned long rdx = ARGVALUE(3);
+	unsigned long rcx;
 	register unsigned long r10 __asm__("r10") = ARGVALUE(4);
 	register unsigned long r8 __asm__("r8") = ARGVALUE(5);
 	register unsigned long r9 __asm__("r9") = ARGVALUE(6);
@@ -41,18 +48,38 @@ static void guest_code(void)
 			     "r"(r10), "r"(r8), "r"(r9));
 	GUEST_ASSERT(rax == RETVALUE);
 
-	/* Now fill in the hypercall page */
+	/* Fill in the Xen hypercall page */
 	__asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
 			     "a" (HCALL_REGION_GPA & 0xffffffff),
 			     "d" (HCALL_REGION_GPA >> 32));
 
-	/* And invoke the same hypercall that way */
+	/* Set Hyper-V Guest OS ID */
+	__asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR),
+			     "a" (0x5a), "d" (0));
+
+	/* Hyper-V hypercall page */
+	u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1;
+	__asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR),
+			     "a" (msrval & 0xffffffff),
+			     "d" (msrval >> 32));
+
+	/* Invoke a Xen hypercall */
 	__asm__ __volatile__("call *%1" : "=a"(rax) :
 			     "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
 			     "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
 			     "r"(r10), "r"(r8), "r"(r9));
 	GUEST_ASSERT(rax == RETVALUE);
 
+	/* Invoke a Hyper-V hypercall */
+	rax = 0;
+	rcx = HVCALL_SIGNAL_EVENT;	/* code */
+	rdx = 0x5a5a5a5a;		/* ingpa (badly aligned) */
+	__asm__ __volatile__("call *%1" : "=a"(rax) :
+			     "r"(HCALL_REGION_GPA + PAGE_SIZE),
+			     "a"(rax), "c"(rcx), "d"(rdx),
+			     "r"(r8));
+	GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
+
 	GUEST_DONE();
 }
 
@@ -73,11 +100,11 @@ int main(int argc, char *argv[])
 	};
 	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
 
-	/* Map a region for the hypercall page */
+	/* Map a region for the hypercall pages */
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
                                     HCALL_REGION_GPA,
HCALL_REGION_SLOT,
-				    getpagesize(), 0);
-	virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 1, 0);
+				    2 * getpagesize(), 0);
+	virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2, 0);
 
 	for (;;) {
 		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
Ankur Arora Dec. 2, 2020, 5:19 a.m. UTC | #7
On 2020-12-01 1:48 a.m., David Woodhouse wrote:
> On Wed, 2019-02-20 at 20:15 +0000, Joao Martins wrote:
>> Add a new exit reason for emulator to handle Xen hypercalls.
>> Albeit these are injected only if guest has initialized the Xen
>> hypercall page
> 
> I've reworked this a little.
> 
> I didn't like the inconsistency of allowing userspace to provide the
> hypercall pages even though the ABI is now defined by the kernel and it
> *has* to be VMCALL/VMMCALL.
> 
> So I switched it to generate the hypercall page directly from the
> kernel, just like we do for the Hyper-V hypercall page.
> 
> I introduced a new flag in the xen_hvm_config struct to enable this
> behaviour, and advertised it in the KVM_CAP_XEN_HVM return value.
> 
> I also added the cpl and support for 6-argument hypercalls, and made it
> check the guest RIP when completing the call as discussed (although I
> still think that probably ought to be a generic thing).
> 
> I adjusted the test case from my version of the patch, and added
> support for actually testing the hypercall page MSR.
> 
> https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/xenpv
> 
> I'll go through and rebase your patch series at least up to patch 16
> and collect them in that tree, then probably post them for real once
> I've got everything working locally.
> 
> 
> =======================
>  From c037c329c8867b219afe2100e383c62e9db7b06d Mon Sep 17 00:00:00 2001
> From: Joao Martins <joao.m.martins@oracle.com>
> Date: Wed, 13 Jun 2018 09:55:44 -0400
> Subject: [PATCH] KVM: x86/xen: intercept xen hypercalls if enabled
> 
> Add a new exit reason for emulator to handle Xen hypercalls.
> 
> Since this means KVM owns the ABI, dispense with the facility for the
> VMM to provide its own copy of the hypercall pages; just fill them in
> directly using VMCALL/VMMCALL as we do for the Hyper-V hypercall page.
> 
> This behaviour is enabled by a new INTERCEPT_HCALL flag in the
> KVM_XEN_HVM_CONFIG ioctl structure, and advertised by the same flag
> being returned from the KVM_CAP_XEN_HVM check.
> 
> Add a test case and shift xen_hvm_config() to the nascent xen.c while
> we're at it.
> 
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
>   arch/x86/include/asm/kvm_host.h               |   6 +
>   arch/x86/kvm/Makefile                         |   2 +-
>   arch/x86/kvm/trace.h                          |  36 +++++
>   arch/x86/kvm/x86.c                            |  46 +++---
>   arch/x86/kvm/xen.c                            | 140 ++++++++++++++++++
>   arch/x86/kvm/xen.h                            |  21 +++
>   include/uapi/linux/kvm.h                      |  19 +++
>   tools/testing/selftests/kvm/Makefile          |   1 +
>   tools/testing/selftests/kvm/lib/kvm_util.c    |   1 +
>   .../selftests/kvm/x86_64/xen_vmcall_test.c    | 123 +++++++++++++++
>   10 files changed, 365 insertions(+), 30 deletions(-)
>   create mode 100644 arch/x86/kvm/xen.c
>   create mode 100644 arch/x86/kvm/xen.h
>   create mode 100644 tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
> 

> diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
> new file mode 100644
> index 000000000000..6400a4bc8480
> --- /dev/null
> +++ b/arch/x86/kvm/xen.c
> @@ -0,0 +1,140 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
> + * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
> + *
> + * KVM Xen emulation
> + */
> +
> +#include "x86.h"
> +#include "xen.h"
> +
> +#include <linux/kvm_host.h>
> +
> +#include <trace/events/kvm.h>
> +
> +#include "trace.h"
> +
> +int kvm_xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
> +	{
> +	struct kvm *kvm = vcpu->kvm;
> +	u32 page_num = data & ~PAGE_MASK;
> +	u64 page_addr = data & PAGE_MASK;
> +
> +	/*
> +	 * If Xen hypercall intercept is enabled, fill the hypercall
> +	 * page with VMCALL/VMMCALL instructions since that's what
> +	 * we catch. Else the VMM has provided the hypercall pages
> +	 * with instructions of its own choosing, so use those.
> +	 */
> +	if (kvm_xen_hypercall_enabled(kvm)) {
> +		u8 instructions[32];
> +		int i;
> +
> +		if (page_num)
> +			return 1;
> +
> +		/* mov imm32, %eax */
> +		instructions[0] = 0xb8;
> +
> +		/* vmcall / vmmcall */
> +		kvm_x86_ops.patch_hypercall(vcpu, instructions + 5);
> +
> +		/* ret */
> +		instructions[8] = 0xc3;
> +
> +		/* int3 to pad */
> +		memset(instructions + 9, 0xcc, sizeof(instructions) - 9);
> +
> +		for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) {
> +			*(u32 *)&instructions[1] = i;
> +			if (kvm_vcpu_write_guest(vcpu,
> +						 page_addr + (i * sizeof(instructions)),
> +						 instructions, sizeof(instructions)))
> +				return 1;
> +		}

HYPERVISOR_iret isn't supported on 64bit so should be ud2 instead.


Ankur

> +	} else {
> +		int lm = is_long_mode(vcpu);
> +		u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
> +				   : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
> +		u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
> +				  : kvm->arch.xen_hvm_config.blob_size_32;
> +		u8 *page;
> +
> +		if (page_num >= blob_size)
> +			return 1;
> +
> +		page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
> +		if (IS_ERR(page))
> +			return PTR_ERR(page);
> +
> +		if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
> +			kfree(page);
> +			return 1;
> +		}
> +	}
> +	return 0;
> +}
> +
> +static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
> +{
> +	kvm_rax_write(vcpu, result);
> +	return kvm_skip_emulated_instruction(vcpu);
> +}
> +
> +static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_run *run = vcpu->run;
> +
> +	if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip)))
> +		return 1;
> +
> +	return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
> +}
> +
> +int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
> +{
> +	bool longmode;
> +	u64 input, params[6];
> +
> +	input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX);
> +
> +	longmode = is_64_bit_mode(vcpu);
> +	if (!longmode) {
> +		params[0] = (u32)kvm_rbx_read(vcpu);
> +		params[1] = (u32)kvm_rcx_read(vcpu);
> +		params[2] = (u32)kvm_rdx_read(vcpu);
> +		params[3] = (u32)kvm_rsi_read(vcpu);
> +		params[4] = (u32)kvm_rdi_read(vcpu);
> +		params[5] = (u32)kvm_rbp_read(vcpu);
> +	}
> +#ifdef CONFIG_X86_64
> +	else {
> +		params[0] = (u64)kvm_rdi_read(vcpu);
> +		params[1] = (u64)kvm_rsi_read(vcpu);
> +		params[2] = (u64)kvm_rdx_read(vcpu);
> +		params[3] = (u64)kvm_r10_read(vcpu);
> +		params[4] = (u64)kvm_r8_read(vcpu);
> +		params[5] = (u64)kvm_r9_read(vcpu);
> +	}
> +#endif
> +	trace_kvm_xen_hypercall(input, params[0], params[1], params[2],
> +				params[3], params[4], params[5]);
> +
> +	vcpu->run->exit_reason = KVM_EXIT_XEN;
> +	vcpu->run->xen.type = KVM_EXIT_XEN_HCALL;
> +	vcpu->run->xen.u.hcall.longmode = longmode;
> +	vcpu->run->xen.u.hcall.cpl = kvm_x86_ops.get_cpl(vcpu);
> +	vcpu->run->xen.u.hcall.input = input;
> +	vcpu->run->xen.u.hcall.params[0] = params[0];
> +	vcpu->run->xen.u.hcall.params[1] = params[1];
> +	vcpu->run->xen.u.hcall.params[2] = params[2];
> +	vcpu->run->xen.u.hcall.params[3] = params[3];
> +	vcpu->run->xen.u.hcall.params[4] = params[4];
> +	vcpu->run->xen.u.hcall.params[5] = params[5];
> +	vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu);
> +	vcpu->arch.complete_userspace_io =
> +		kvm_xen_hypercall_complete_userspace;
> +
> +	return 0;
> +}
> diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
> new file mode 100644
> index 000000000000..81e12f716d2e
> --- /dev/null
> +++ b/arch/x86/kvm/xen.h
> @@ -0,0 +1,21 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
> + * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
> + *
> + * KVM Xen emulation
> + */
> +
> +#ifndef __ARCH_X86_KVM_XEN_H__
> +#define __ARCH_X86_KVM_XEN_H__
> +
> +int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
> +int kvm_xen_hvm_config(struct kvm_vcpu *vcpu, u64 data);
> +
> +static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
> +{
> +	return kvm->arch.xen_hvm_config.flags &
> +		KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL;
> +}
> +
> +#endif /* __ARCH_X86_KVM_XEN_H__ */
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index ca41220b40b8..00221fe56994 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -216,6 +216,20 @@ struct kvm_hyperv_exit {
>   	} u;
>   };
>   
> +struct kvm_xen_exit {
> +#define KVM_EXIT_XEN_HCALL          1
> +	__u32 type;
> +	union {
> +		struct {
> +			__u32 longmode;
> +			__u32 cpl;
> +			__u64 input;
> +			__u64 result;
> +			__u64 params[6];
> +		} hcall;
> +	} u;
> +};
> +
>   #define KVM_S390_GET_SKEYS_NONE   1
>   #define KVM_S390_SKEYS_MAX        1048576
>   
> @@ -250,6 +264,7 @@ struct kvm_hyperv_exit {
>   #define KVM_EXIT_ARM_NISV         28
>   #define KVM_EXIT_X86_RDMSR        29
>   #define KVM_EXIT_X86_WRMSR        30
> +#define KVM_EXIT_XEN              31
>   
>   /* For KVM_EXIT_INTERNAL_ERROR */
>   /* Emulate instruction failed. */
> @@ -426,6 +441,8 @@ struct kvm_run {
>   			__u32 index; /* kernel -> user */
>   			__u64 data; /* kernel <-> user */
>   		} msr;
> +		/* KVM_EXIT_XEN */
> +		struct kvm_xen_exit xen;
>   		/* Fix the size of the union. */
>   		char padding[256];
>   	};
> @@ -1126,6 +1143,8 @@ struct kvm_x86_mce {
>   #endif
>   
>   #ifdef KVM_CAP_XEN_HVM
> +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL	(1 << 1)
> +
>   struct kvm_xen_hvm_config {
>   	__u32 flags;
>   	__u32 msr;
> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> index 3d14ef77755e..d94abec627e6 100644
> --- a/tools/testing/selftests/kvm/Makefile
> +++ b/tools/testing/selftests/kvm/Makefile
> @@ -59,6 +59,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
>   TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
>   TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
>   TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
> +TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
>   TEST_GEN_PROGS_x86_64 += demand_paging_test
>   TEST_GEN_PROGS_x86_64 += dirty_log_test
>   TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index 126c6727a6b0..6e96ae47d28c 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -1654,6 +1654,7 @@ static struct exit_reason {
>   	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
>   	{KVM_EXIT_OSI, "OSI"},
>   	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
> +	{KVM_EXIT_XEN, "XEN"},
>   #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
>   	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
>   #endif
> diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
> new file mode 100644
> index 000000000000..3f1dd93626e5
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
> @@ -0,0 +1,123 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * svm_vmcall_test
> + *
> + * Copyright © 2020 Amazon.com, Inc. or its affiliates.
> + *
> + * Userspace hypercall testing
> + */
> +
> +#include "test_util.h"
> +#include "kvm_util.h"
> +#include "processor.h"
> +
> +#define VCPU_ID		5
> +
> +#define HCALL_REGION_GPA	0xc0000000ULL
> +#define HCALL_REGION_SLOT	10
> +
> +static struct kvm_vm *vm;
> +
> +#define INPUTVALUE 17
> +#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
> +#define RETVALUE 0xcafef00dfbfbffffUL
> +
> +#define XEN_HYPERCALL_MSR 0x40000000
> +
> +static void guest_code(void)
> +{
> +	unsigned long rax = INPUTVALUE;
> +	unsigned long rdi = ARGVALUE(1);
> +	unsigned long rsi = ARGVALUE(2);
> +	unsigned long rdx = ARGVALUE(3);
> +	register unsigned long r10 __asm__("r10") = ARGVALUE(4);
> +	register unsigned long r8 __asm__("r8") = ARGVALUE(5);
> +	register unsigned long r9 __asm__("r9") = ARGVALUE(6);
> +
> +	/* First a direct invocation of 'vmcall' */
> +	__asm__ __volatile__("vmcall" :
> +			     "=a"(rax) :
> +			     "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
> +			     "r"(r10), "r"(r8), "r"(r9));
> +	GUEST_ASSERT(rax == RETVALUE);
> +
> +	/* Now fill in the hypercall page */
> +	__asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
> +			     "a" (HCALL_REGION_GPA & 0xffffffff),
> +			     "d" (HCALL_REGION_GPA >> 32));
> +
> +	/* And invoke the same hypercall that way */
> +	__asm__ __volatile__("call *%1" : "=a"(rax) :
> +			     "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
> +			     "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
> +			     "r"(r10), "r"(r8), "r"(r9));
> +	GUEST_ASSERT(rax == RETVALUE);
> +
> +	GUEST_DONE();
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
> +	      KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) ) {
> +		print_skip("KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL not available");
> +		exit(KSFT_SKIP);
> +	}
> +
> +	vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
> +	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
> +
> +	struct kvm_xen_hvm_config hvmc = {
> +		.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
> +		.msr = XEN_HYPERCALL_MSR,
> +	};
> +	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
> +
> +	/* Map a region for the hypercall page */
> +	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
> +                                    HCALL_REGION_GPA, HCALL_REGION_SLOT,
> +				    getpagesize(), 0);
> +	virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 1, 0);
> +
> +	for (;;) {
> +		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
> +		struct ucall uc;
> +
> +		vcpu_run(vm, VCPU_ID);
> +
> +		if (run->exit_reason == KVM_EXIT_XEN) {
> +			ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
> +			ASSERT_EQ(run->xen.u.hcall.cpl, 0);
> +			ASSERT_EQ(run->xen.u.hcall.longmode, 1);
> +			ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
> +			ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
> +			ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
> +			ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
> +			ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
> +			ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
> +			ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
> +			run->xen.u.hcall.result = RETVALUE;
> +			continue;
> +		}
> +
> +		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
> +			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
> +			    run->exit_reason,
> +			    exit_reason_str(run->exit_reason));
> +
> +		switch (get_ucall(vm, VCPU_ID, &uc)) {
> +		case UCALL_ABORT:
> +			TEST_FAIL("%s", (const char *)uc.args[0]);
> +			/* NOT REACHED */
> +		case UCALL_SYNC:
> +			break;
> +		case UCALL_DONE:
> +			goto done;
> +		default:
> +			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
> +		}
> +	}
> +done:
> +	kvm_vm_free(vm);
> +	return 0;
> +}
>
David Woodhouse Dec. 2, 2020, 8:03 a.m. UTC | #8
On Tue, 2020-12-01 at 21:19 -0800, Ankur Arora wrote:
> > +             for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) {
> > +                     *(u32 *)&instructions[1] = i;
> > +                     if (kvm_vcpu_write_guest(vcpu,
> > +                                              page_addr + (i * sizeof(instructions)),
> > +                                              instructions, sizeof(instructions)))
> > +                             return 1;
> > +             }
> 
> HYPERVISOR_iret isn't supported on 64bit so should be ud2 instead.

Yeah, I got part way through typing that part but concluded it probably
wasn't a fast path that absolutely needed to be emulated in the kernel.

The VMM can inject the UD# when it receives the hypercall.

I appreciate it *is* a guest-visible difference, if we're being really
pedantic, but I don't think we were even going to be able to 100% hide
the fact that it's not actually Xen.
Joao Martins Dec. 2, 2020, 11:17 a.m. UTC | #9
On 12/1/20 11:19 AM, David Woodhouse wrote:
> On Tue, 2020-12-01 at 09:48 +0000, David Woodhouse wrote:
>> So I switched it to generate the hypercall page directly from the
>> kernel, just like we do for the Hyper-V hypercall page.
> 
> Speaking of Hyper-V... I'll post this one now so you can start heckling
> it.
> 
Xen viridian mode is indeed one thing that needed fixing. And definitely a
separate patch as you do here. Both this one and the previous is looking good.

I suppose that because you switch to kvm_vcpu_write_guest() you no longer need
to validate that the hypercall page is correct neither marking as dirty. Probably
worth making that explicit in the commit message.

> I won't post the rest as I go; not much of the rest of the series when
> I eventually post it will be very new and exciting. It'll just be
> rebasing and tweaking your originals and perhaps adding some tests.
> 

Awesome!!
David Woodhouse Dec. 2, 2020, 12:12 p.m. UTC | #10
On Wed, 2020-12-02 at 11:17 +0000, Joao Martins wrote:
> Xen viridian mode is indeed one thing that needed fixing. And definitely a
> separate patch as you do here. Both this one and the previous is looking good.
> 
> I suppose that because you switch to kvm_vcpu_write_guest() you no longer need
> to validate that the hypercall page is correct neither marking as dirty. Probably
> worth making that explicit in the commit message.

I had intended that to be covered by "open-coding it".

Surely the *point* in using a helper function and not open-coding it is
that we don't *have* to care about the details of what it does ;)
Ankur Arora Dec. 2, 2020, 6:20 p.m. UTC | #11
On 2020-12-02 12:03 a.m., David Woodhouse wrote:
> On Tue, 2020-12-01 at 21:19 -0800, Ankur Arora wrote:
>>> +             for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) {
>>> +                     *(u32 *)&instructions[1] = i;
>>> +                     if (kvm_vcpu_write_guest(vcpu,
>>> +                                              page_addr + (i * sizeof(instructions)),
>>> +                                              instructions, sizeof(instructions)))
>>> +                             return 1;
>>> +             }
>>
>> HYPERVISOR_iret isn't supported on 64bit so should be ud2 instead.
> 
> Yeah, I got part way through typing that part but concluded it probably
> wasn't a fast path that absolutely needed to be emulated in the kernel.
> 
> The VMM can inject the UD# when it receives the hypercall.

That would work as well but if it's a straight ud2 on the hypercall
page, wouldn't the guest just execute it when/if it does a
HYPERVISOR_iret?

Ankur


> 
> I appreciate it *is* a guest-visible difference, if we're being really
> pedantic, but I don't think we were even going to be able to 100% hide
> the fact that it's not actually Xen.
>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9417febf8490..0f469ce439c0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,6 +79,7 @@ 
 #define KVM_REQ_HV_STIMER		KVM_ARCH_REQ(22)
 #define KVM_REQ_LOAD_EOI_EXITMAP	KVM_ARCH_REQ(23)
 #define KVM_REQ_GET_VMCS12_PAGES	KVM_ARCH_REQ(24)
+#define KVM_REQ_XEN_EXIT		KVM_ARCH_REQ(25)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -533,6 +534,11 @@  struct kvm_vcpu_hv {
 	cpumask_t tlb_flush;
 };
 
+/* Xen per vcpu emulation context */
+struct kvm_vcpu_xen {
+	struct kvm_xen_exit exit;
+};
+
 struct kvm_vcpu_arch {
 	/*
 	 * rip and regs accesses must go through
@@ -720,6 +726,7 @@  struct kvm_vcpu_arch {
 	unsigned long singlestep_rip;
 
 	struct kvm_vcpu_hv hyperv;
+	struct kvm_vcpu_xen xen;
 
 	cpumask_var_t wbinvd_dirty_mask;
 
@@ -833,6 +840,11 @@  struct kvm_hv {
 	atomic_t num_mismatched_vp_indexes;
 };
 
+/* Xen emulation context */
+struct kvm_xen {
+	u64 xen_hypercall;
+};
+
 enum kvm_irqchip_mode {
 	KVM_IRQCHIP_NONE,
 	KVM_IRQCHIP_KERNEL,       /* created with KVM_CREATE_IRQCHIP */
@@ -899,6 +911,7 @@  struct kvm_arch {
 	struct hlist_head mask_notifier_list;
 
 	struct kvm_hv hyperv;
+	struct kvm_xen xen;
 
 	#ifdef CONFIG_KVM_MMU_AUDIT
 	int audit_point;
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 31ecf7a76d5a..2b46c93c9380 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -10,7 +10,7 @@  kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
-			   hyperv.o page_track.o debugfs.o
+			   hyperv.o xen.o page_track.o debugfs.o
 
 kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
 kvm-amd-y		+= svm.o pmu_amd.o
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6432d08c7de7..4fe9fd86292f 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -91,6 +91,39 @@  TRACE_EVENT(kvm_hv_hypercall,
 );
 
 /*
+ * Tracepoint for Xen hypercall.
+ */
+TRACE_EVENT(kvm_xen_hypercall,
+	TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
+		 unsigned long a2, unsigned long a3, unsigned long a4),
+	TP_ARGS(nr, a0, a1, a2, a3, a4),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, nr)
+		__field(unsigned long, a0)
+		__field(unsigned long, a1)
+		__field(unsigned long, a2)
+		__field(unsigned long, a3)
+		__field(unsigned long, a4)
+	),
+
+	TP_fast_assign(
+		__entry->nr = nr;
+		__entry->a0 = a0;
+		__entry->a1 = a1;
+		__entry->a2 = a2;
+		__entry->a3 = a3;
+		__entry->a4 = a4;
+	),
+
+	TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx",
+		 __entry->nr, __entry->a0, __entry->a1,  __entry->a2,
+		 __entry->a3, __entry->a4)
+);
+
+
+
+/*
  * Tracepoint for PIO.
  */
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 47360a4b0d42..be8def385e3f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -29,6 +29,7 @@ 
 #include "cpuid.h"
 #include "pmu.h"
 #include "hyperv.h"
+#include "xen.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -2338,6 +2339,8 @@  static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
 	}
 	if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
 		goto out_free;
+
+	kvm_xen_hypercall_set(kvm);
 	r = 0;
 out_free:
 	kfree(page);
@@ -7076,6 +7079,9 @@  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	if (kvm_hv_hypercall_enabled(vcpu->kvm))
 		return kvm_hv_hypercall(vcpu);
 
+	if (kvm_xen_hypercall_enabled(vcpu->kvm))
+		return kvm_xen_hypercall(vcpu);
+
 	nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
 	a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -7736,6 +7742,12 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 0;
 			goto out;
 		}
+		if (kvm_check_request(KVM_REQ_XEN_EXIT, vcpu)) {
+			vcpu->run->exit_reason = KVM_EXIT_XEN;
+			vcpu->run->xen = vcpu->arch.xen.exit;
+			r = 0;
+			goto out;
+		}
 
 		/*
 		 * KVM_REQ_HV_STIMER has to be processed after
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
new file mode 100644
index 000000000000..76f0e4b812d2
--- /dev/null
+++ b/arch/x86/kvm/xen.c
@@ -0,0 +1,79 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
+ *
+ * KVM Xen emulation
+ */
+
+#include "x86.h"
+#include "xen.h"
+
+#include <linux/kvm_host.h>
+
+#include <trace/events/kvm.h>
+
+#include "trace.h"
+
+bool kvm_xen_hypercall_enabled(struct kvm *kvm)
+{
+	return READ_ONCE(kvm->arch.xen.xen_hypercall);
+}
+
+bool kvm_xen_hypercall_set(struct kvm *kvm)
+{
+	return WRITE_ONCE(kvm->arch.xen.xen_hypercall, 1);
+}
+
+static void kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
+{
+	kvm_register_write(vcpu, VCPU_REGS_RAX, result);
+}
+
+static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+
+	kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
+	return kvm_skip_emulated_instruction(vcpu);
+}
+
+int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
+{
+	bool longmode;
+	u64 input, params[5];
+
+	input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX);
+
+	longmode = is_64_bit_mode(vcpu);
+	if (!longmode) {
+		params[0] = (u64)kvm_register_read(vcpu, VCPU_REGS_RBX);
+		params[1] = (u64)kvm_register_read(vcpu, VCPU_REGS_RCX);
+		params[2] = (u64)kvm_register_read(vcpu, VCPU_REGS_RDX);
+		params[3] = (u64)kvm_register_read(vcpu, VCPU_REGS_RSI);
+		params[4] = (u64)kvm_register_read(vcpu, VCPU_REGS_RDI);
+	}
+#ifdef CONFIG_X86_64
+	else {
+		params[0] = (u64)kvm_register_read(vcpu, VCPU_REGS_RDI);
+		params[1] = (u64)kvm_register_read(vcpu, VCPU_REGS_RSI);
+		params[2] = (u64)kvm_register_read(vcpu, VCPU_REGS_RDX);
+		params[3] = (u64)kvm_register_read(vcpu, VCPU_REGS_R10);
+		params[4] = (u64)kvm_register_read(vcpu, VCPU_REGS_R8);
+	}
+#endif
+	trace_kvm_xen_hypercall(input, params[0], params[1], params[2],
+				params[3], params[4]);
+
+	vcpu->run->exit_reason = KVM_EXIT_XEN;
+	vcpu->run->xen.type = KVM_EXIT_XEN_HCALL;
+	vcpu->run->xen.u.hcall.input = input;
+	vcpu->run->xen.u.hcall.params[0] = params[0];
+	vcpu->run->xen.u.hcall.params[1] = params[1];
+	vcpu->run->xen.u.hcall.params[2] = params[2];
+	vcpu->run->xen.u.hcall.params[3] = params[3];
+	vcpu->run->xen.u.hcall.params[4] = params[4];
+	vcpu->arch.complete_userspace_io =
+		kvm_xen_hypercall_complete_userspace;
+
+	return 0;
+}
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
new file mode 100644
index 000000000000..a2ae079c3ef3
--- /dev/null
+++ b/arch/x86/kvm/xen.h
@@ -0,0 +1,10 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved. */
+#ifndef __ARCH_X86_KVM_XEN_H__
+#define __ARCH_X86_KVM_XEN_H__
+
+bool kvm_xen_hypercall_enabled(struct kvm *kvm);
+bool kvm_xen_hypercall_set(struct kvm *kvm);
+int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6d4ea4b6c922..d07520c216a1 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -204,6 +204,18 @@  struct kvm_hyperv_exit {
 	} u;
 };
 
+struct kvm_xen_exit {
+#define KVM_EXIT_XEN_HCALL          1
+	__u32 type;
+	union {
+		struct {
+			__u64 input;
+			__u64 result;
+			__u64 params[5];
+		} hcall;
+	} u;
+};
+
 #define KVM_S390_GET_SKEYS_NONE   1
 #define KVM_S390_SKEYS_MAX        1048576
 
@@ -235,6 +247,7 @@  struct kvm_hyperv_exit {
 #define KVM_EXIT_S390_STSI        25
 #define KVM_EXIT_IOAPIC_EOI       26
 #define KVM_EXIT_HYPERV           27
+#define KVM_EXIT_XEN              28
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -392,8 +405,10 @@  struct kvm_run {
 		} eoi;
 		/* KVM_EXIT_HYPERV */
 		struct kvm_hyperv_exit hyperv;
+		/* KVM_EXIT_XEN */
+		struct kvm_xen_exit xen;
 		/* Fix the size of the union. */
-		char padding[256];
+		char padding[196];
 	};
 
 	/* 2048 is the size of the char array used to bound/pad the size