diff mbox series

[v19,097/130] KVM: x86: Split core of hypercall emulation to helper function

Message ID d6547bd0c1eccdfb4a4908e330cc56ad39535f5e.1708933498.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand

Commit Message

Isaku Yamahata Feb. 26, 2024, 8:26 a.m. UTC
From: Sean Christopherson <sean.j.christopherson@intel.com>

By necessity, TDX will use a different register ABI for hypercalls.
Break out the core functionality so that it may be reused for TDX.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  4 +++
 arch/x86/kvm/x86.c              | 56 ++++++++++++++++++++++-----------
 2 files changed, 42 insertions(+), 18 deletions(-)

Comments

Chao Gao March 29, 2024, 3:24 a.m. UTC | #1
On Mon, Feb 26, 2024 at 12:26:39AM -0800, isaku.yamahata@intel.com wrote:
>@@ -10162,18 +10151,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> 
> 		WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ);
> 		vcpu->arch.complete_userspace_io = complete_hypercall_exit;
>+		/* stat is incremented on completion. */

Perhaps we could use a distinct return value to signal that the request is redirected
to userspace. This way, more cases can be supported, e.g., accesses to MTRR
MSRs, requests to service TDs, etc. And then ...

> 		return 0;
> 	}
> 	default:
> 		ret = -KVM_ENOSYS;
> 		break;
> 	}
>+
> out:
>+	++vcpu->stat.hypercalls;
>+	return ret;
>+}
>+EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall);
>+
>+int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>+{
>+	unsigned long nr, a0, a1, a2, a3, ret;
>+	int op_64_bit;
>+	int cpl;
>+
>+	if (kvm_xen_hypercall_enabled(vcpu->kvm))
>+		return kvm_xen_hypercall(vcpu);
>+
>+	if (kvm_hv_hypercall_enabled(vcpu))
>+		return kvm_hv_hypercall(vcpu);
>+
>+	nr = kvm_rax_read(vcpu);
>+	a0 = kvm_rbx_read(vcpu);
>+	a1 = kvm_rcx_read(vcpu);
>+	a2 = kvm_rdx_read(vcpu);
>+	a3 = kvm_rsi_read(vcpu);
>+	op_64_bit = is_64_bit_hypercall(vcpu);
>+	cpl = static_call(kvm_x86_get_cpl)(vcpu);
>+
>+	ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
>+	if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
>+		/* MAP_GPA tosses the request to the user space. */

no need to check what the request is. Just checking the return value will suffice.

>+		return 0;
>+
> 	if (!op_64_bit)
> 		ret = (u32)ret;
> 	kvm_rax_write(vcpu, ret);
> 
>-	++vcpu->stat.hypercalls;
> 	return kvm_skip_emulated_instruction(vcpu);
> }
> EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
>-- 
>2.25.1
>
>
Isaku Yamahata April 3, 2024, 6:34 p.m. UTC | #2
On Fri, Mar 29, 2024 at 11:24:55AM +0800,
Chao Gao <chao.gao@intel.com> wrote:

> On Mon, Feb 26, 2024 at 12:26:39AM -0800, isaku.yamahata@intel.com wrote:
> >@@ -10162,18 +10151,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> > 
> > 		WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ);
> > 		vcpu->arch.complete_userspace_io = complete_hypercall_exit;
> >+		/* stat is incremented on completion. */
> 
> Perhaps we could use a distinct return value to signal that the request is redirected
> to userspace. This way, more cases can be supported, e.g., accesses to MTRR
> MSRs, requests to service TDs, etc. And then ...

The convention here is the one for exit_handler vcpu_enter_guest() already uses.
If we introduce something like KVM_VCPU_CONTINUE=1, KVM_VCPU_EXIT_TO_USER=0, it
will touch many places.  So if we will (I'm not sure it's worthwhile), the
cleanup should be done as independently.


> > 		return 0;
> > 	}
> > 	default:
> > 		ret = -KVM_ENOSYS;
> > 		break;
> > 	}
> >+
> > out:
> >+	++vcpu->stat.hypercalls;
> >+	return ret;
> >+}
> >+EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall);
> >+
> >+int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> >+{
> >+	unsigned long nr, a0, a1, a2, a3, ret;
> >+	int op_64_bit;
> >+	int cpl;
> >+
> >+	if (kvm_xen_hypercall_enabled(vcpu->kvm))
> >+		return kvm_xen_hypercall(vcpu);
> >+
> >+	if (kvm_hv_hypercall_enabled(vcpu))
> >+		return kvm_hv_hypercall(vcpu);
> >+
> >+	nr = kvm_rax_read(vcpu);
> >+	a0 = kvm_rbx_read(vcpu);
> >+	a1 = kvm_rcx_read(vcpu);
> >+	a2 = kvm_rdx_read(vcpu);
> >+	a3 = kvm_rsi_read(vcpu);
> >+	op_64_bit = is_64_bit_hypercall(vcpu);
> >+	cpl = static_call(kvm_x86_get_cpl)(vcpu);
> >+
> >+	ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
> >+	if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
> >+		/* MAP_GPA tosses the request to the user space. */
> 
> no need to check what the request is. Just checking the return value will suffice.

This is needed to avoid updating rax etc.  KVM_HC_MAP_GPA_RANGE is only an
exception to go to the user space.  This check is a bit weird, but I couldn't
find a good way.

> 
> >+		return 0;
> >+
> > 	if (!op_64_bit)
> > 		ret = (u32)ret;
> > 	kvm_rax_write(vcpu, ret);
> > 
> >-	++vcpu->stat.hypercalls;
> > 	return kvm_skip_emulated_instruction(vcpu);
> > }
> > EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
> >-- 
> >2.25.1
> >
> >
>
Sean Christopherson April 3, 2024, 6:55 p.m. UTC | #3
On Wed, Apr 03, 2024, Isaku Yamahata wrote:
> On Fri, Mar 29, 2024 at 11:24:55AM +0800,
> Chao Gao <chao.gao@intel.com> wrote:
> 
> > On Mon, Feb 26, 2024 at 12:26:39AM -0800, isaku.yamahata@intel.com wrote:
> > >@@ -10162,18 +10151,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> > > 
> > > 		WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ);
> > > 		vcpu->arch.complete_userspace_io = complete_hypercall_exit;
> > >+		/* stat is incremented on completion. */
> > 
> > Perhaps we could use a distinct return value to signal that the request is redirected
> > to userspace. This way, more cases can be supported, e.g., accesses to MTRR
> > MSRs, requests to service TDs, etc. And then ...
> 
> The convention here is the one for exit_handler vcpu_enter_guest() already uses.
> If we introduce something like KVM_VCPU_CONTINUE=1, KVM_VCPU_EXIT_TO_USER=0, it
> will touch many places.  So if we will (I'm not sure it's worthwhile), the
> cleanup should be done as independently.

Yeah, this is far from the first time that someone has complained about KVM's
awful 1/0 return magic.  And every time we've looked at it, we've come to the
conclusion that it's not worth the churn/risk.

And if we really need to further overload the return value, we can, e.g. KVM
already does this for MSR accesses:

/*
 * Internal error codes that are used to indicate that MSR emulation encountered
 * an error that should result in #GP in the guest, unless userspace
 * handles it.
 */
#define  KVM_MSR_RET_INVALID	2	/* in-kernel MSR emulation #GP condition */
#define  KVM_MSR_RET_FILTERED	3	/* #GP due to userspace MSR filter */
Binbin Wu April 9, 2024, 9:28 a.m. UTC | #4
On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote:
> From: Sean Christopherson <sean.j.christopherson@intel.com>
>
> By necessity, TDX will use a different register ABI for hypercalls.
> Break out the core functionality so that it may be reused for TDX.
>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
>   arch/x86/include/asm/kvm_host.h |  4 +++
>   arch/x86/kvm/x86.c              | 56 ++++++++++++++++++++++-----------
>   2 files changed, 42 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index e0ffef1d377d..bb8be091f996 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -2177,6 +2177,10 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
>   	kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
>   }
>   
> +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
> +				      unsigned long a0, unsigned long a1,
> +				      unsigned long a2, unsigned long a3,
> +				      int op_64_bit, int cpl);
>   int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
>   
>   int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index fb7597c22f31..03950368d8db 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -10073,26 +10073,15 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
>   	return kvm_skip_emulated_instruction(vcpu);
>   }
>   
> -int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
> +				      unsigned long a0, unsigned long a1,
> +				      unsigned long a2, unsigned long a3,
> +				      int op_64_bit, int cpl)
>   {
> -	unsigned long nr, a0, a1, a2, a3, ret;
> -	int op_64_bit;
> -
> -	if (kvm_xen_hypercall_enabled(vcpu->kvm))
> -		return kvm_xen_hypercall(vcpu);
> -
> -	if (kvm_hv_hypercall_enabled(vcpu))
> -		return kvm_hv_hypercall(vcpu);
> -
> -	nr = kvm_rax_read(vcpu);
> -	a0 = kvm_rbx_read(vcpu);
> -	a1 = kvm_rcx_read(vcpu);
> -	a2 = kvm_rdx_read(vcpu);
> -	a3 = kvm_rsi_read(vcpu);
> +	unsigned long ret;
>   
>   	trace_kvm_hypercall(nr, a0, a1, a2, a3);
>   
> -	op_64_bit = is_64_bit_hypercall(vcpu);
>   	if (!op_64_bit) {
>   		nr &= 0xFFFFFFFF;
>   		a0 &= 0xFFFFFFFF;
> @@ -10101,7 +10090,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>   		a3 &= 0xFFFFFFFF;
>   	}
>   
> -	if (static_call(kvm_x86_get_cpl)(vcpu) != 0) {
> +	if (cpl) {
>   		ret = -KVM_EPERM;
>   		goto out;
>   	}
> @@ -10162,18 +10151,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>   
>   		WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ);
>   		vcpu->arch.complete_userspace_io = complete_hypercall_exit;
> +		/* stat is incremented on completion. */
>   		return 0;
>   	}
>   	default:
>   		ret = -KVM_ENOSYS;
>   		break;
>   	}
> +
>   out:
> +	++vcpu->stat.hypercalls;
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall);
> +
> +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long nr, a0, a1, a2, a3, ret;
> +	int op_64_bit;

Can it be opportunistically changed to bool type, as well as the 
argument type of "op_64_bit" in __kvm_emulate_hypercall()?

> +	int cpl;
> +
> +	if (kvm_xen_hypercall_enabled(vcpu->kvm))
> +		return kvm_xen_hypercall(vcpu);
> +
> +	if (kvm_hv_hypercall_enabled(vcpu))
> +		return kvm_hv_hypercall(vcpu);
> +
> +	nr = kvm_rax_read(vcpu);
> +	a0 = kvm_rbx_read(vcpu);
> +	a1 = kvm_rcx_read(vcpu);
> +	a2 = kvm_rdx_read(vcpu);
> +	a3 = kvm_rsi_read(vcpu);
> +	op_64_bit = is_64_bit_hypercall(vcpu);
> +	cpl = static_call(kvm_x86_get_cpl)(vcpu);
> +
> +	ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
> +	if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
> +		/* MAP_GPA tosses the request to the user space. */
> +		return 0;
> +
>   	if (!op_64_bit)
>   		ret = (u32)ret;
>   	kvm_rax_write(vcpu, ret);
>   
> -	++vcpu->stat.hypercalls;
>   	return kvm_skip_emulated_instruction(vcpu);
>   }
>   EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
Isaku Yamahata April 15, 2024, 10:51 p.m. UTC | #5
On Tue, Apr 09, 2024 at 05:28:05PM +0800,
Binbin Wu <binbin.wu@linux.intel.com> wrote:

> > +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> > +{
> > +	unsigned long nr, a0, a1, a2, a3, ret;
> > +	int op_64_bit;
> 
> Can it be opportunistically changed to bool type, as well as the argument
> type of "op_64_bit" in __kvm_emulate_hypercall()?

Yes. We can also fix kvm_pv_send_ipi(op_64_bit).
Binbin Wu May 9, 2024, 3:26 a.m. UTC | #6
On 4/4/2024 2:34 AM, Isaku Yamahata wrote:
> On Fri, Mar 29, 2024 at 11:24:55AM +0800,
> Chao Gao <chao.gao@intel.com> wrote:
>
>> On Mon, Feb 26, 2024 at 12:26:39AM -0800, isaku.yamahata@intel.com wrote:
>>> +
>>> +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>>> +{
>>> +	unsigned long nr, a0, a1, a2, a3, ret;
>>> +	int op_64_bit;
>>> +	int cpl;
>>> +
>>> +	if (kvm_xen_hypercall_enabled(vcpu->kvm))
>>> +		return kvm_xen_hypercall(vcpu);
>>> +
>>> +	if (kvm_hv_hypercall_enabled(vcpu))
>>> +		return kvm_hv_hypercall(vcpu);
>>> +
>>> +	nr = kvm_rax_read(vcpu);
>>> +	a0 = kvm_rbx_read(vcpu);
>>> +	a1 = kvm_rcx_read(vcpu);
>>> +	a2 = kvm_rdx_read(vcpu);
>>> +	a3 = kvm_rsi_read(vcpu);
>>> +	op_64_bit = is_64_bit_hypercall(vcpu);
>>> +	cpl = static_call(kvm_x86_get_cpl)(vcpu);
>>> +
>>> +	ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
>>> +	if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
>>> +		/* MAP_GPA tosses the request to the user space. */
>> no need to check what the request is. Just checking the return value will suffice.
> This is needed to avoid updating rax etc.  KVM_HC_MAP_GPA_RANGE is only an
> exception to go to the user space.  This check is a bit weird, but I couldn't
> find a good way.

To be generic, I think we can use
"vcpu->kvm->arch.hypercall_exit_enabled & (1 << nr)" to check if it 
needs to exit to userspace.

i.e.,

+       ...
+       ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, 
op_64_bit, cpl);
+       if (!ret && (vcpu->kvm->arch.hypercall_exit_enabled & (1 << nr)))
+               /* The hypercall is requested to exit to userspace. */
+               return 0;

>
>>> +		return 0;
>>> +
>>> 	if (!op_64_bit)
>>> 		ret = (u32)ret;
>>> 	kvm_rax_write(vcpu, ret);
>>>
>>> -	++vcpu->stat.hypercalls;
>>> 	return kvm_skip_emulated_instruction(vcpu);
>>> }
>>> EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
>>> -- 
>>> 2.25.1
>>>
>>>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e0ffef1d377d..bb8be091f996 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2177,6 +2177,10 @@  static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
 	kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
 }
 
+unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
+				      unsigned long a0, unsigned long a1,
+				      unsigned long a2, unsigned long a3,
+				      int op_64_bit, int cpl);
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fb7597c22f31..03950368d8db 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10073,26 +10073,15 @@  static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
 	return kvm_skip_emulated_instruction(vcpu);
 }
 
-int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
+unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
+				      unsigned long a0, unsigned long a1,
+				      unsigned long a2, unsigned long a3,
+				      int op_64_bit, int cpl)
 {
-	unsigned long nr, a0, a1, a2, a3, ret;
-	int op_64_bit;
-
-	if (kvm_xen_hypercall_enabled(vcpu->kvm))
-		return kvm_xen_hypercall(vcpu);
-
-	if (kvm_hv_hypercall_enabled(vcpu))
-		return kvm_hv_hypercall(vcpu);
-
-	nr = kvm_rax_read(vcpu);
-	a0 = kvm_rbx_read(vcpu);
-	a1 = kvm_rcx_read(vcpu);
-	a2 = kvm_rdx_read(vcpu);
-	a3 = kvm_rsi_read(vcpu);
+	unsigned long ret;
 
 	trace_kvm_hypercall(nr, a0, a1, a2, a3);
 
-	op_64_bit = is_64_bit_hypercall(vcpu);
 	if (!op_64_bit) {
 		nr &= 0xFFFFFFFF;
 		a0 &= 0xFFFFFFFF;
@@ -10101,7 +10090,7 @@  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 		a3 &= 0xFFFFFFFF;
 	}
 
-	if (static_call(kvm_x86_get_cpl)(vcpu) != 0) {
+	if (cpl) {
 		ret = -KVM_EPERM;
 		goto out;
 	}
@@ -10162,18 +10151,49 @@  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 
 		WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ);
 		vcpu->arch.complete_userspace_io = complete_hypercall_exit;
+		/* stat is incremented on completion. */
 		return 0;
 	}
 	default:
 		ret = -KVM_ENOSYS;
 		break;
 	}
+
 out:
+	++vcpu->stat.hypercalls;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall);
+
+int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
+{
+	unsigned long nr, a0, a1, a2, a3, ret;
+	int op_64_bit;
+	int cpl;
+
+	if (kvm_xen_hypercall_enabled(vcpu->kvm))
+		return kvm_xen_hypercall(vcpu);
+
+	if (kvm_hv_hypercall_enabled(vcpu))
+		return kvm_hv_hypercall(vcpu);
+
+	nr = kvm_rax_read(vcpu);
+	a0 = kvm_rbx_read(vcpu);
+	a1 = kvm_rcx_read(vcpu);
+	a2 = kvm_rdx_read(vcpu);
+	a3 = kvm_rsi_read(vcpu);
+	op_64_bit = is_64_bit_hypercall(vcpu);
+	cpl = static_call(kvm_x86_get_cpl)(vcpu);
+
+	ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
+	if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
+		/* MAP_GPA tosses the request to the user space. */
+		return 0;
+
 	if (!op_64_bit)
 		ret = (u32)ret;
 	kvm_rax_write(vcpu, ret);
 
-	++vcpu->stat.hypercalls;
 	return kvm_skip_emulated_instruction(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);