diff mbox series

[6/9] KVM: x86: Update guest cpu_caps at runtime for dynamic CPUID-based features

Message ID 20231110235528.1561679-7-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: Replace governed features with guest cpu_caps | expand

Commit Message

Sean Christopherson Nov. 10, 2023, 11:55 p.m. UTC
When updating guest CPUID entries to emulate runtime behavior, e.g. when
the guest enables a CR4-based feature that is tied to a CPUID flag, also
update the vCPU's cpu_caps accordingly.  This will allow replacing all
usage of guest_cpuid_has() with guest_cpu_cap_has().

Take care not to update guest capabilities when KVM is updating CPUID
entries that *may* become the vCPU's CPUID, e.g. if userspace tries to set
bogus CPUID information.  No extra call to update cpu_caps is needed as
the cpu_caps are initialized from the incoming guest CPUID, i.e. will
automatically get the updated values.

Note, none of the features in question use guest_cpu_cap_has() at this
time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/cpuid.c | 48 +++++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 14 deletions(-)

Comments

Robert Hoo Nov. 13, 2023, 8:03 a.m. UTC | #1
On 11/11/2023 7:55 AM, Sean Christopherson wrote:
> When updating guest CPUID entries to emulate runtime behavior, e.g. when
> the guest enables a CR4-based feature that is tied to a CPUID flag, also
> update the vCPU's cpu_caps accordingly.  This will allow replacing all
> usage of guest_cpuid_has() with guest_cpu_cap_has().
> 
> Take care not to update guest capabilities when KVM is updating CPUID
> entries that *may* become the vCPU's CPUID, e.g. if userspace tries to set
> bogus CPUID information.  No extra call to update cpu_caps is needed as
> the cpu_caps are initialized from the incoming guest CPUID, i.e. will
> automatically get the updated values.
> 
> Note, none of the features in question use guest_cpu_cap_has() at this
> time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.
> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>   arch/x86/kvm/cpuid.c | 48 +++++++++++++++++++++++++++++++-------------
>   1 file changed, 34 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 36bd04030989..37a991439fe6 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -262,31 +262,48 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
>   	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
>   }
>   
> +static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
> +						       struct kvm_cpuid_entry2 *entry,
> +						       unsigned int x86_feature,
> +						       bool has_feature)
> +{
> +	if (entry)
> +		cpuid_entry_change(entry, x86_feature, has_feature);
> +
> +	if (vcpu)
> +		guest_cpu_cap_change(vcpu, x86_feature, has_feature);
> +}
> +
>   static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
>   				       int nent)
>   {
>   	struct kvm_cpuid_entry2 *best;
> +	struct kvm_vcpu *caps = vcpu;

u32 *caps  = vcpu->arch.cpu_caps;
and update guest_cpu_cap_set(), guest_cpu_cap_clear(), guest_cpu_cap_change() 
and guest_cpu_cap_restrict() to pass in vcpu->arch.cpu_caps instead of vcpu, 
since all of them merely refer to vcpu cap, rather than whole vcpu info.

Or, for simple change, here rename variable name "caps" --> "vcpu", to less 
reading confusion.

> +
> +	/*
> +	 * Don't update vCPU capabilities if KVM is updating CPUID entries that
> +	 * are coming in from userspace!
> +	 */
> +	if (entries != vcpu->arch.cpuid_entries)
> +		caps = NULL;
>   
>   	best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> -	if (best) {
> -		/* Update OSXSAVE bit */
> -		if (boot_cpu_has(X86_FEATURE_XSAVE))
> -			cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> +
> +	if (boot_cpu_has(X86_FEATURE_XSAVE))
> +		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
>   					   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
>   
> -		cpuid_entry_change(best, X86_FEATURE_APIC,
> -			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> +	kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
> +				   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
>   
> -		if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> -			cpuid_entry_change(best, X86_FEATURE_MWAIT,
> -					   vcpu->arch.ia32_misc_enable_msr &
> -					   MSR_IA32_MISC_ENABLE_MWAIT);
> -	}
> +	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> +		kvm_update_feature_runtime(caps, best, X86_FEATURE_MWAIT,
> +					   vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT);
>   
>   	best = cpuid_entry2_find(entries, nent, 7, 0);
> -	if (best && boot_cpu_has(X86_FEATURE_PKU))
> -		cpuid_entry_change(best, X86_FEATURE_OSPKE,
> -				   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> +	if (boot_cpu_has(X86_FEATURE_PKU))
> +		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSPKE,
> +					   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
>   
>   	best = cpuid_entry2_find(entries, nent, 0xD, 0);
>   	if (best)
> @@ -353,6 +370,9 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>   	 * Reset guest capabilities to userspace's guest CPUID definition, i.e.
>   	 * honor userspace's definition for features that don't require KVM or
>   	 * hardware management/support (or that KVM simply doesn't care about).
> +	 *
> +	 * Note, KVM has already done runtime updates on guest CPUID, i.e. this
> +	 * will also correctly set runtime features in guest CPU capabilities.
>   	 */
>   	for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
>   		const struct cpuid_reg cpuid = reverse_cpuid[i];
Sean Christopherson Nov. 14, 2023, 1:48 p.m. UTC | #2
On Mon, Nov 13, 2023, Robert Hoo wrote:
> On 11/11/2023 7:55 AM, Sean Christopherson wrote:
> > When updating guest CPUID entries to emulate runtime behavior, e.g. when
> > the guest enables a CR4-based feature that is tied to a CPUID flag, also
> > update the vCPU's cpu_caps accordingly.  This will allow replacing all
> > usage of guest_cpuid_has() with guest_cpu_cap_has().
> > 
> > Take care not to update guest capabilities when KVM is updating CPUID
> > entries that *may* become the vCPU's CPUID, e.g. if userspace tries to set
> > bogus CPUID information.  No extra call to update cpu_caps is needed as
> > the cpu_caps are initialized from the incoming guest CPUID, i.e. will
> > automatically get the updated values.
> > 
> > Note, none of the features in question use guest_cpu_cap_has() at this
> > time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.
> > 
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> >   arch/x86/kvm/cpuid.c | 48 +++++++++++++++++++++++++++++++-------------
> >   1 file changed, 34 insertions(+), 14 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index 36bd04030989..37a991439fe6 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -262,31 +262,48 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
> >   	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
> >   }
> > +static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
> > +						       struct kvm_cpuid_entry2 *entry,
> > +						       unsigned int x86_feature,
> > +						       bool has_feature)
> > +{
> > +	if (entry)
> > +		cpuid_entry_change(entry, x86_feature, has_feature);
> > +
> > +	if (vcpu)
> > +		guest_cpu_cap_change(vcpu, x86_feature, has_feature);
> > +}
> > +
> >   static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> >   				       int nent)
> >   {
> >   	struct kvm_cpuid_entry2 *best;
> > +	struct kvm_vcpu *caps = vcpu;
> 
> u32 *caps  = vcpu->arch.cpu_caps;
> and update guest_cpu_cap_set(), guest_cpu_cap_clear(),
> guest_cpu_cap_change() and guest_cpu_cap_restrict() to pass in
> vcpu->arch.cpu_caps instead of vcpu, since all of them merely refer to vcpu
> cap, rather than whole vcpu info.

No, because then every caller would need extra code to pass vcpu->cpu_caps, and
passing 'u32 *' provides less type safety than 'struct kvm_vcpu *'.  That tradeoff
isn't worth making this one path slightly easier to read.

> Or, for simple change, here rename variable name "caps" --> "vcpu", to less
> reading confusion.

@vcpu is already defined and needs to be used in this function.  See the comment
below.

I'm definitely open to a better name, though I would like to keep the name
relative short so that the line lengths of the callers is reasonable, e.g. would
prefer not to do vcpu_caps.

> > +	/*
> > +	 * Don't update vCPU capabilities if KVM is updating CPUID entries that
> > +	 * are coming in from userspace!
> > +	 */
> > +	if (entries != vcpu->arch.cpuid_entries)
> > +		caps = NULL;
> >   	best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> > -	if (best) {
> > -		/* Update OSXSAVE bit */
> > -		if (boot_cpu_has(X86_FEATURE_XSAVE))
> > -			cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> > +
> > +	if (boot_cpu_has(X86_FEATURE_XSAVE))
> > +		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
> >   					   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
> > -		cpuid_entry_change(best, X86_FEATURE_APIC,
> > -			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> > +	kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
> > +				   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
Robert Hoo Nov. 15, 2023, 1:59 a.m. UTC | #3
On 11/14/2023 9:48 PM, Sean Christopherson wrote:
> On Mon, Nov 13, 2023, Robert Hoo wrote:
...
>> u32 *caps  = vcpu->arch.cpu_caps;
>> and update guest_cpu_cap_set(), guest_cpu_cap_clear(),
>> guest_cpu_cap_change() and guest_cpu_cap_restrict() to pass in
>> vcpu->arch.cpu_caps instead of vcpu, since all of them merely refer to vcpu
>> cap, rather than whole vcpu info.
> 
> No, because then every caller would need extra code to pass vcpu->cpu_caps, 

Emm, I don't understand this. I tried to modified and compiled, all need to do 
is simply substitute "vcpu" with "vcpu->arch.cpu_caps" in calling. (at the end 
is my diff based on this patch set)

> and
> passing 'u32 *' provides less type safety than 'struct kvm_vcpu *'.  That tradeoff
> isn't worth making this one path slightly easier to read.

My point is also from vulnerability, long term, since as a principle, we'd 
better pass in param/info to a function of its necessity. e.g. cpuid_entry2_find().
Anyway, this is a less important point, shouldn't distract your focus.

This patch set's whole idea is good, I also felt confusion when initially 
looking into vCPUID code and its complicated dependencies with each other and 
KVM cap (or your word govern)  ( and even Kernel govern and HW cap ?). With this 
guest_cap[], the layered relationship can be much clearer, alone with fast guest 
cap queries.
> 
>> Or, for simple change, here rename variable name "caps" --> "vcpu", to less
>> reading confusion.
> 
> @vcpu is already defined and needs to be used in this function.  See the comment
> below.
> 
> I'm definitely open to a better name, though I would like to keep the name
> relative short so that the line lengths of the callers is reasonable, e.g. would
> prefer not to do vcpu_caps.
> 
>>> +	/*
>>> +	 * Don't update vCPU capabilities if KVM is updating CPUID entries that
>>> +	 * are coming in from userspace!
>>> +	 */
>>> +	if (entries != vcpu->arch.cpuid_entries)
>>> +		caps = NULL;
>>>    	best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
>>> -	if (best) {
>>> -		/* Update OSXSAVE bit */
>>> -		if (boot_cpu_has(X86_FEATURE_XSAVE))
>>> -			cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
>>> +
>>> +	if (boot_cpu_has(X86_FEATURE_XSAVE))
>>> +		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
>>>    					   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
>>> -		cpuid_entry_change(best, X86_FEATURE_APIC,
>>> -			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
>>> +	kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
>>> +				   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6407e5c45f20..3e8976705342 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -262,7 +262,7 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 
*entries, int nent)
         return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
  }

-static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
+static __always_inline void kvm_update_feature_runtime(u32 *guest_caps,
                                                        struct kvm_cpuid_entry2 
*entry,
                                                        unsigned int x86_feature,
                                                        bool has_feature)
@@ -270,15 +270,15 @@ static __always_inline void 
kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
         if (entry)
                 cpuid_entry_change(entry, x86_feature, has_feature);

-       if (vcpu)
-               guest_cpu_cap_change(vcpu, x86_feature, has_feature);
+       if (guest_caps)
+               guest_cpu_cap_change(guest_caps, x86_feature, has_feature);
  }

  static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct 
kvm_cpuid_entry2 *entries,
                                        int nent)
  {
         struct kvm_cpuid_entry2 *best;
-       struct kvm_vcpu *caps = vcpu;
+       u32 *caps = vcpu->arch.cpu_caps;

         /*
          * Don't update vCPU capabilities if KVM is updating CPUID entries that
@@ -397,7 +397,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
          */
         allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
                                       guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES);
-       guest_cpu_cap_change(vcpu, X86_FEATURE_GBPAGES, allow_gbpages);
+       guest_cpu_cap_change(vcpu->arch.cpu_caps, X86_FEATURE_GBPAGES, 
allow_gbpages);

         best = kvm_find_cpuid_entry(vcpu, 1);
         if (best && apic) {
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 98694dfe062e..a3a0482fc514 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -183,39 +183,39 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu 
*vcpu,
         return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
  }

-static __always_inline void guest_cpu_cap_set(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_set(u32 *caps,
                                               unsigned int x86_feature)
  {
         unsigned int x86_leaf = __feature_leaf(x86_feature);

         reverse_cpuid_check(x86_leaf);
-       vcpu->arch.cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
+       caps[x86_leaf] |= __feature_bit(x86_feature);
  }

-static __always_inline void guest_cpu_cap_clear(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_clear(u32 *caps,
                                                 unsigned int x86_feature)
  {
         unsigned int x86_leaf = __feature_leaf(x86_feature);

         reverse_cpuid_check(x86_leaf);
-       vcpu->arch.cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
+       caps[x86_leaf] &= ~__feature_bit(x86_feature);
  }

-static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_change(u32 *caps,
                                                  unsigned int x86_feature,
                                                  bool guest_has_cap)
  {
         if (guest_has_cap)
-               guest_cpu_cap_set(vcpu, x86_feature);
+               guest_cpu_cap_set(caps, x86_feature);
         else
-               guest_cpu_cap_clear(vcpu, x86_feature);
+               guest_cpu_cap_clear(caps, x86_feature);
  }

-static __always_inline void guest_cpu_cap_restrict(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_restrict(u32 *caps,
                                                    unsigned int x86_feature)
  {
         if (!kvm_cpu_cap_has(x86_feature))
-               guest_cpu_cap_clear(vcpu, x86_feature);
+               guest_cpu_cap_clear(caps, x86_feature);
  }

  static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6fe2d7bf4959..dd4ca07c3cd0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4315,14 +4315,14 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
          * XSS on VM-Enter/VM-Exit.  Failure to do so would effectively give
          * the guest read/write access to the host's XSS.
          */
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_XSAVE);
-       guest_cpu_cap_change(vcpu, X86_FEATURE_XSAVES,
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_XSAVE);
+       guest_cpu_cap_change(vcpu->arch.cpu_caps, X86_FEATURE_XSAVES,
                              boot_cpu_has(X86_FEATURE_XSAVES) &&
                              guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE));

-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_NRIPS);
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_TSCRATEMSR);
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_LBRV);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_NRIPS);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_TSCRATEMSR);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_LBRV);

         /*
          * Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
@@ -4330,12 +4330,12 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
          * SVM on Intel is bonkers and extremely unlikely to work).
          */
         if (!guest_cpuid_is_intel(vcpu))
-               guest_cpu_cap_restrict(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
+               guest_cpu_cap_restrict(vcpu->arch.cpu_caps, 
X86_FEATURE_V_VMSAVE_VMLOAD);

-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_PAUSEFILTER);
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_PFTHRESHOLD);
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_VGIF);
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_VNMI);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PAUSEFILTER);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PFTHRESHOLD);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VGIF);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VNMI);

         svm_recalc_instruction_intercepts(vcpu, svm);

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7645945af5c5..c23c96dc24cf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7752,13 +7752,13 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
          * to the guest.  XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
          * set if and only if XSAVE is supported.
          */
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_XSAVE);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_XSAVE);
         if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE))
-               guest_cpu_cap_restrict(vcpu, X86_FEATURE_XSAVES);
+               guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_XSAVES);
         else
-               guest_cpu_cap_clear(vcpu, X86_FEATURE_XSAVES);
+               guest_cpu_cap_clear(vcpu->arch.cpu_caps, X86_FEATURE_XSAVES);

-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_VMX);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VMX);

         vmx_setup_uret_msrs(vmx);
Sean Christopherson Nov. 15, 2023, 3:09 p.m. UTC | #4
On Wed, Nov 15, 2023, Robert Hoo wrote:
> On 11/14/2023 9:48 PM, Sean Christopherson wrote:
> > On Mon, Nov 13, 2023, Robert Hoo wrote:
> ...
> > > u32 *caps  = vcpu->arch.cpu_caps;
> > > and update guest_cpu_cap_set(), guest_cpu_cap_clear(),
> > > guest_cpu_cap_change() and guest_cpu_cap_restrict() to pass in
> > > vcpu->arch.cpu_caps instead of vcpu, since all of them merely refer to vcpu
> > > cap, rather than whole vcpu info.
> > 
> > No, because then every caller would need extra code to pass
> > vcpu->cpu_caps,
> 
> Emm, I don't understand this. I tried to modified and compiled, all need to
> do is simply substitute "vcpu" with "vcpu->arch.cpu_caps" in calling. (at
> the end is my diff based on this patch set)

Yes, and I'm saying that

	guest_cpu_cap_restrict(vcpu, X86_FEATURE_PAUSEFILTER);
	guest_cpu_cap_restrict(vcpu, X86_FEATURE_PFTHRESHOLD);
	guest_cpu_cap_restrict(vcpu, X86_FEATURE_VGIF);
	guest_cpu_cap_restrict(vcpu, X86_FEATURE_VNMI);

is harder to read and write than this

	guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PAUSEFILTER);
	guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PFTHRESHOLD);
	guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VGIF);
	guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VNMI);

a one-time search-replace is easy, but the extra boilerplate has a non-zero cost
for every future developer/reader.

> > and passing 'u32 *' provides less type safety than 'struct kvm_vcpu *'.
> > That tradeoff isn't worth making this one path slightly easier to read.
> 
> My point is also from vulnerability, long term, since as a principle, we'd
> better pass in param/info to a function of its necessity.

Attempting to apply the principle of least privilege to low level C helpers is
nonsensical.  E.g. the helper can trivially get at the owning vcpu via container_of()
(well, if not for typeof assertions not playing nice with arrays, but open coding
container_of() is also trivial and illustrates the point).

	struct kvm_vcpu_arch *arch = (void *)caps -  offsetof(struct kvm_vcpu_arch, cpu_caps);
	struct kvm_vcpu *vcpu = container_of(arch, struct kvm_vcpu, arch);

	if (!kvm_cpu_cap_has(x86_feature))
		guest_cpu_cap_clear(vcpu, x86_feature);

And the intent behind that principle is to improve security/robustness; what I'm
saying is that passing in a 'u32 *" makes the overall implementation _less_ robust,
as it opens up the possibilities of passing in an unsafe/incorrect pointer.  E.g.
a well-intentioned, not _that_ obviously broken example is:

	guest_cpu_cap_restrict(&vcpu->arch.cpu_caps[CPUID_1_ECX], X86_FEATURE_XSAVE);

> e.g. cpuid_entry2_find().

The main reason cpuid_entry2_find() exists is because KVM checks the incoming
array provided by KVM_SET_CPUID2, which is also the reason why
__kvm_update_cpuid_runtime() takes an @entries array instead of just @vcpu.
Yang Weijiang Nov. 16, 2023, 2:24 a.m. UTC | #5
On 11/11/2023 7:55 AM, Sean Christopherson wrote:
> When updating guest CPUID entries to emulate runtime behavior, e.g. when
> the guest enables a CR4-based feature that is tied to a CPUID flag, also
> update the vCPU's cpu_caps accordingly.  This will allow replacing all
> usage of guest_cpuid_has() with guest_cpu_cap_has().
>
> Take care not to update guest capabilities when KVM is updating CPUID
> entries that *may* become the vCPU's CPUID, e.g. if userspace tries to set
> bogus CPUID information.  No extra call to update cpu_caps is needed as
> the cpu_caps are initialized from the incoming guest CPUID, i.e. will
> automatically get the updated values.
>
> Note, none of the features in question use guest_cpu_cap_has() at this
> time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>   arch/x86/kvm/cpuid.c | 48 +++++++++++++++++++++++++++++++-------------
>   1 file changed, 34 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 36bd04030989..37a991439fe6 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -262,31 +262,48 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
>   	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
>   }
>   
> +static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
> +						       struct kvm_cpuid_entry2 *entry,
> +						       unsigned int x86_feature,
> +						       bool has_feature)
> +{
> +	if (entry)
> +		cpuid_entry_change(entry, x86_feature, has_feature);
> +
> +	if (vcpu)
> +		guest_cpu_cap_change(vcpu, x86_feature, has_feature);
> +}
> +
>   static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
>   				       int nent)
>   {
>   	struct kvm_cpuid_entry2 *best;
> +	struct kvm_vcpu *caps = vcpu;
> +
> +	/*
> +	 * Don't update vCPU capabilities if KVM is updating CPUID entries that
> +	 * are coming in from userspace!
> +	 */
> +	if (entries != vcpu->arch.cpuid_entries)
> +		caps = NULL;

Nit, why here we use caps instead of vcpu? Looks a bit weird.

>   
>   	best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> -	if (best) {
> -		/* Update OSXSAVE bit */
> -		if (boot_cpu_has(X86_FEATURE_XSAVE))
> -			cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> +
> +	if (boot_cpu_has(X86_FEATURE_XSAVE))
> +		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
>   					   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
>   
> -		cpuid_entry_change(best, X86_FEATURE_APIC,
> -			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> +	kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
> +				   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
>   
> -		if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> -			cpuid_entry_change(best, X86_FEATURE_MWAIT,
> -					   vcpu->arch.ia32_misc_enable_msr &
> -					   MSR_IA32_MISC_ENABLE_MWAIT);
> -	}
> +	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> +		kvm_update_feature_runtime(caps, best, X86_FEATURE_MWAIT,
> +					   vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT);

 > 80 characters?

>   
>   	best = cpuid_entry2_find(entries, nent, 7, 0);
> -	if (best && boot_cpu_has(X86_FEATURE_PKU))
> -		cpuid_entry_change(best, X86_FEATURE_OSPKE,
> -				   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> +	if (boot_cpu_has(X86_FEATURE_PKU))
> +		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSPKE,
> +					   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
>   
>   	best = cpuid_entry2_find(entries, nent, 0xD, 0);
>   	if (best)
> @@ -353,6 +370,9 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>   	 * Reset guest capabilities to userspace's guest CPUID definition, i.e.
>   	 * honor userspace's definition for features that don't require KVM or
>   	 * hardware management/support (or that KVM simply doesn't care about).
> +	 *
> +	 * Note, KVM has already done runtime updates on guest CPUID, i.e. this
> +	 * will also correctly set runtime features in guest CPU capabilities.
>   	 */
>   	for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
>   		const struct cpuid_reg cpuid = reverse_cpuid[i];
Reviewed-by: Yang Weijiang <weijiang.yang@intel.com>
Sean Christopherson Nov. 16, 2023, 10:19 p.m. UTC | #6
On Thu, Nov 16, 2023, Weijiang Yang wrote:
> On 11/11/2023 7:55 AM, Sean Christopherson wrote:
> >   static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> >   				       int nent)
> >   {
> >   	struct kvm_cpuid_entry2 *best;
> > +	struct kvm_vcpu *caps = vcpu;
> > +
> > +	/*
> > +	 * Don't update vCPU capabilities if KVM is updating CPUID entries that
> > +	 * are coming in from userspace!
> > +	 */
> > +	if (entries != vcpu->arch.cpuid_entries)
> > +		caps = NULL;
> 
> Nit, why here we use caps instead of vcpu? Looks a bit weird.

See my response to Robert: https://lore.kernel.org/all/9395d416-cc5c-536d-641e-ffd971b682d1@gmail.com

> >   	best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> > -	if (best) {
> > -		/* Update OSXSAVE bit */
> > -		if (boot_cpu_has(X86_FEATURE_XSAVE))
> > -			cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> > +
> > +	if (boot_cpu_has(X86_FEATURE_XSAVE))
> > +		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
> >   					   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
> > -		cpuid_entry_change(best, X86_FEATURE_APIC,
> > -			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> > +	kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
> > +				   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> > -		if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> > -			cpuid_entry_change(best, X86_FEATURE_MWAIT,
> > -					   vcpu->arch.ia32_misc_enable_msr &
> > -					   MSR_IA32_MISC_ENABLE_MWAIT);
> > -	}
> > +	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> > +		kvm_update_feature_runtime(caps, best, X86_FEATURE_MWAIT,
> > +					   vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT);
> 
> > 80 characters?

Hmm, yeah, I suppose.
Robert Hoo Nov. 17, 2023, 1:28 a.m. UTC | #7
On 11/15/2023 11:09 PM, Sean Christopherson wrote:
...
>>> No, because then every caller would need extra code to pass
>>> vcpu->cpu_caps,
>>
>> Emm, I don't understand this. I tried to modified and compiled, all need to
>> do is simply substitute "vcpu" with "vcpu->arch.cpu_caps" in calling. (at
>> the end is my diff based on this patch set)
> 
> Yes, and I'm saying that
> 
> 	guest_cpu_cap_restrict(vcpu, X86_FEATURE_PAUSEFILTER);
> 	guest_cpu_cap_restrict(vcpu, X86_FEATURE_PFTHRESHOLD);
> 	guest_cpu_cap_restrict(vcpu, X86_FEATURE_VGIF);
> 	guest_cpu_cap_restrict(vcpu, X86_FEATURE_VNMI);
> 
> is harder to read and write than this
> 
> 	guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PAUSEFILTER);
> 	guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PFTHRESHOLD);
> 	guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VGIF);
> 	guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VNMI);
> 
> a one-time search-replace is easy, but the extra boilerplate has a non-zero cost
> for every future developer/reader.

Hmm, I think this is trivial. And can be solved/eased by other means, e.g. 
Macro?. Rather than in the sacrifice of letting function's inside (easily) 
access those info it shouldn't.
> 
>>> and passing 'u32 *' provides less type safety than 'struct kvm_vcpu *'.
>>> That tradeoff isn't worth making this one path slightly easier to read.
>>
>> My point is also from vulnerability, long term, since as a principle, we'd
>> better pass in param/info to a function of its necessity.
> 
> Attempting to apply the principle of least privilege to low level C helpers is
> nonsensical.  E.g. the helper can trivially get at the owning vcpu via container_of()
> (well, if not for typeof assertions not playing nice with arrays, but open coding
> container_of() is also trivial and illustrates the point).
> 
> 	struct kvm_vcpu_arch *arch = (void *)caps -  offsetof(struct kvm_vcpu_arch, cpu_caps);
> 	struct kvm_vcpu *vcpu = container_of(arch, struct kvm_vcpu, arch);
> 
> 	if (!kvm_cpu_cap_has(x86_feature))
> 		guest_cpu_cap_clear(vcpu, x86_feature);
> 
> And the intent behind that principle is to improve security/robustness; what I'm
> saying is that passing in a 'u32 *" makes the overall implementation _less_ robust,
> as it opens up the possibilities of passing in an unsafe/incorrect pointer.  E.g.
> a well-intentioned, not _that_ obviously broken example is:
> 
> 	guest_cpu_cap_restrict(&vcpu->arch.cpu_caps[CPUID_1_ECX], X86_FEATURE_XSAVE);
> 
>> e.g. cpuid_entry2_find().
> 
> The main reason cpuid_entry2_find() exists is because KVM checks the incoming
> array provided by KVM_SET_CPUID2, which is also the reason why
> __kvm_update_cpuid_runtime() takes an @entries array instead of just @vcpu.

Thanks for detailed explanation, I understand your points deeper, though I would 
still prefer to honoring the principle if it was me to write the function. The 
concerns above can/should be addressed by other means. (If some really cannot be 
solved in C, i.e. more stringent type check, it's C to blame ;) but it on the 
other side offers those flexibility that other languages cannot, doesn't it?)
Another pros of the principle is that, it's also a fence, prevent (at least 
raise the bar) people in the future from doing something that shouldn't be in 
the function, e.g.  for his convenience to quickly fix a bug etc.

Anyway, it's a dilemma, and I said it's a less important point for this great 
progress of vCPUID's implementation, thanks.

Reviewed-by: Robert Hoo <robert.hoo.linux@gmail.com>
Maxim Levitsky Nov. 19, 2023, 5:35 p.m. UTC | #8
On Fri, 2023-11-10 at 15:55 -0800, Sean Christopherson wrote:
> When updating guest CPUID entries to emulate runtime behavior, e.g. when
> the guest enables a CR4-based feature that is tied to a CPUID flag, also
> update the vCPU's cpu_caps accordingly.  This will allow replacing all
> usage of guest_cpuid_has() with guest_cpu_cap_has().
> 
> Take care not to update guest capabilities when KVM is updating CPUID
> entries that *may* become the vCPU's CPUID, e.g. if userspace tries to set
> bogus CPUID information.  No extra call to update cpu_caps is needed as
> the cpu_caps are initialized from the incoming guest CPUID, i.e. will
> automatically get the updated values.
> 
> Note, none of the features in question use guest_cpu_cap_has() at this
> time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.
> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/cpuid.c | 48 +++++++++++++++++++++++++++++++-------------
>  1 file changed, 34 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 36bd04030989..37a991439fe6 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -262,31 +262,48 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
>  	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
>  }
>  
> +static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
> +						       struct kvm_cpuid_entry2 *entry,
> +						       unsigned int x86_feature,
> +						       bool has_feature)
> +{
> +	if (entry)
> +		cpuid_entry_change(entry, x86_feature, has_feature);
> +
> +	if (vcpu)
> +		guest_cpu_cap_change(vcpu, x86_feature, has_feature);
> +}
> +
>  static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
>  				       int nent)
>  {
>  	struct kvm_cpuid_entry2 *best;
> +	struct kvm_vcpu *caps = vcpu;
> +
> +	/*
> +	 * Don't update vCPU capabilities if KVM is updating CPUID entries that
> +	 * are coming in from userspace!
> +	 */
> +	if (entries != vcpu->arch.cpuid_entries)
> +		caps = NULL;

I think that this should be decided by the caller. Just a boolean will suffice.

Or even better: since the userspace CPUID update is really not important in terms of performance,
why to special case it? 

Even if these guest caps are later overwritten, I don't see why we
need to avoid updating them, and in fact introduce a small risk of them not being consistent
with the other cpu caps.

With this we can avoid having the 'cap' variable which is *very* confusing as well.


>  
>  	best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> -	if (best) {
> -		/* Update OSXSAVE bit */
> -		if (boot_cpu_has(X86_FEATURE_XSAVE))
> -			cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> +
> +	if (boot_cpu_has(X86_FEATURE_XSAVE))
> +		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
>  					   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
>  
> -		cpuid_entry_change(best, X86_FEATURE_APIC,
> -			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> +	kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
> +				   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
>  
> -		if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> -			cpuid_entry_change(best, X86_FEATURE_MWAIT,
> -					   vcpu->arch.ia32_misc_enable_msr &
> -					   MSR_IA32_MISC_ENABLE_MWAIT);
> -	}
> +	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> +		kvm_update_feature_runtime(caps, best, X86_FEATURE_MWAIT,
> +					   vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT);
>  
>  	best = cpuid_entry2_find(entries, nent, 7, 0);
> -	if (best && boot_cpu_has(X86_FEATURE_PKU))
> -		cpuid_entry_change(best, X86_FEATURE_OSPKE,
> -				   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> +	if (boot_cpu_has(X86_FEATURE_PKU))
> +		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSPKE,
> +					   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
>  
>  	best = cpuid_entry2_find(entries, nent, 0xD, 0);
>  	if (best)
> @@ -353,6 +370,9 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>  	 * Reset guest capabilities to userspace's guest CPUID definition, i.e.
>  	 * honor userspace's definition for features that don't require KVM or
>  	 * hardware management/support (or that KVM simply doesn't care about).
> +	 *
> +	 * Note, KVM has already done runtime updates on guest CPUID, i.e. this
> +	 * will also correctly set runtime features in guest CPU capabilities.
>  	 */
>  	for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
>  		const struct cpuid_reg cpuid = reverse_cpuid[i];


Best regards,
	Maxim Levitsky
Xu Yilun Nov. 24, 2023, 6:33 a.m. UTC | #9
On Sun, Nov 19, 2023 at 07:35:30PM +0200, Maxim Levitsky wrote:
> On Fri, 2023-11-10 at 15:55 -0800, Sean Christopherson wrote:
> > When updating guest CPUID entries to emulate runtime behavior, e.g. when
> > the guest enables a CR4-based feature that is tied to a CPUID flag, also
> > update the vCPU's cpu_caps accordingly.  This will allow replacing all
> > usage of guest_cpuid_has() with guest_cpu_cap_has().
> > 
> > Take care not to update guest capabilities when KVM is updating CPUID
> > entries that *may* become the vCPU's CPUID, e.g. if userspace tries to set
> > bogus CPUID information.  No extra call to update cpu_caps is needed as
> > the cpu_caps are initialized from the incoming guest CPUID, i.e. will
> > automatically get the updated values.
> > 
> > Note, none of the features in question use guest_cpu_cap_has() at this
> > time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.
> > 
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> >  arch/x86/kvm/cpuid.c | 48 +++++++++++++++++++++++++++++++-------------
> >  1 file changed, 34 insertions(+), 14 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index 36bd04030989..37a991439fe6 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -262,31 +262,48 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
> >  	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
> >  }
> >  
> > +static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
> > +						       struct kvm_cpuid_entry2 *entry,
> > +						       unsigned int x86_feature,
> > +						       bool has_feature)
> > +{
> > +	if (entry)
> > +		cpuid_entry_change(entry, x86_feature, has_feature);
> > +
> > +	if (vcpu)
> > +		guest_cpu_cap_change(vcpu, x86_feature, has_feature);
> > +}
> > +
> >  static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> >  				       int nent)
> >  {
> >  	struct kvm_cpuid_entry2 *best;
> > +	struct kvm_vcpu *caps = vcpu;
> > +
> > +	/*
> > +	 * Don't update vCPU capabilities if KVM is updating CPUID entries that
> > +	 * are coming in from userspace!
> > +	 */
> > +	if (entries != vcpu->arch.cpuid_entries)
> > +		caps = NULL;
> 
> I think that this should be decided by the caller. Just a boolean will suffice.

kvm_set_cpuid() calls this function only to validate/adjust the temporary
"entries" variable. While kvm_update_cpuid_runtime() calls it to do system
level changes.

So I kind of agree to make the caller fully awared, how about adding a
newly named wrapper for kvm_set_cpuid(), like:


  static void kvm_adjust_cpuid_entry(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
				     int nent)

  {
	WARN_ON(entries == vcpu->arch.cpuid_entries);
	__kvm_update_cpuid_runtime(vcpu, entries, nent);
  }

> 
> Or even better: since the userspace CPUID update is really not important in terms of performance,
> why to special case it? 
> 
> Even if these guest caps are later overwritten, I don't see why we
> need to avoid updating them, and in fact introduce a small risk of them not being consistent

IIUC, for kvm_set_cpuid() case, KVM may then fail the userspace cpuid setting,
so we can't change guest caps at this phase.

Thanks,
Yilun

> with the other cpu caps.
> 
> With this we can avoid having the 'cap' variable which is *very* confusing as well.
> 
> 
> >  
> >  	best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> > -	if (best) {
> > -		/* Update OSXSAVE bit */
> > -		if (boot_cpu_has(X86_FEATURE_XSAVE))
> > -			cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> > +
> > +	if (boot_cpu_has(X86_FEATURE_XSAVE))
> > +		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
> >  					   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
> >  
> > -		cpuid_entry_change(best, X86_FEATURE_APIC,
> > -			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> > +	kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
> > +				   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> >  
> > -		if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> > -			cpuid_entry_change(best, X86_FEATURE_MWAIT,
> > -					   vcpu->arch.ia32_misc_enable_msr &
> > -					   MSR_IA32_MISC_ENABLE_MWAIT);
> > -	}
> > +	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> > +		kvm_update_feature_runtime(caps, best, X86_FEATURE_MWAIT,
> > +					   vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT);
> >  
> >  	best = cpuid_entry2_find(entries, nent, 7, 0);
> > -	if (best && boot_cpu_has(X86_FEATURE_PKU))
> > -		cpuid_entry_change(best, X86_FEATURE_OSPKE,
> > -				   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> > +	if (boot_cpu_has(X86_FEATURE_PKU))
> > +		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSPKE,
> > +					   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> >  
> >  	best = cpuid_entry2_find(entries, nent, 0xD, 0);
> >  	if (best)
> > @@ -353,6 +370,9 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> >  	 * Reset guest capabilities to userspace's guest CPUID definition, i.e.
> >  	 * honor userspace's definition for features that don't require KVM or
> >  	 * hardware management/support (or that KVM simply doesn't care about).
> > +	 *
> > +	 * Note, KVM has already done runtime updates on guest CPUID, i.e. this
> > +	 * will also correctly set runtime features in guest CPU capabilities.
> >  	 */
> >  	for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
> >  		const struct cpuid_reg cpuid = reverse_cpuid[i];
> 
> 
> Best regards,
> 	Maxim Levitsky
> 
>
Sean Christopherson Nov. 28, 2023, 12:43 a.m. UTC | #10
On Fri, Nov 24, 2023, Xu Yilun wrote:
> On Sun, Nov 19, 2023 at 07:35:30PM +0200, Maxim Levitsky wrote:
> > On Fri, 2023-11-10 at 15:55 -0800, Sean Christopherson wrote:
> > >  static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> > >  				       int nent)
> > >  {
> > >  	struct kvm_cpuid_entry2 *best;
> > > +	struct kvm_vcpu *caps = vcpu;
> > > +
> > > +	/*
> > > +	 * Don't update vCPU capabilities if KVM is updating CPUID entries that
> > > +	 * are coming in from userspace!
> > > +	 */
> > > +	if (entries != vcpu->arch.cpuid_entries)
> > > +		caps = NULL;
> > 
> > I think that this should be decided by the caller. Just a boolean will suffice.

I strongly disagree.  The _only_ time the caps should be updated is if
entries == vcpu->arch.cpuid_entries, and if entries == cpuid_entires than the caps
should _always_ be updated.

> kvm_set_cpuid() calls this function only to validate/adjust the temporary
> "entries" variable. While kvm_update_cpuid_runtime() calls it to do system
> level changes.
> 
> So I kind of agree to make the caller fully awared, how about adding a
> newly named wrapper for kvm_set_cpuid(), like:
> 
> 
>   static void kvm_adjust_cpuid_entry(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> 				     int nent)
> 
>   {
> 	WARN_ON(entries == vcpu->arch.cpuid_entries);
> 	__kvm_update_cpuid_runtime(vcpu, entries, nent);

But taking it a step further, we end up with

	WARN_ON_ONCE(update_caps != (entries == vcpu->arch.cpuid_entries));

which is silly since any bugs that would result in the WARN firing can be avoided
by doing:

	update_caps = entries == vcpu->arch.cpuid_entries;

which eventually distils down to the code I posted.

> > Or even better: since the userspace CPUID update is really not important in
> > terms of performance, why to special case it? 
> > 
> > Even if these guest caps are later overwritten, I don't see why we need to
> > avoid updating them, and in fact introduce a small risk of them not being
> > consistent
> 
> IIUC, for kvm_set_cpuid() case, KVM may then fail the userspace cpuid setting,
> so we can't change guest caps at this phase.

> Or even better: since the userspace CPUID update is really not important in
> terms of performance, why to special case it? 

Yep, and sadly __kvm_update_cpuid_runtime() *must* be invoked before kvm_set_cpuid()
is guaranteed to succeed because the whole point is to massage guest CPUID before
checking for divergences.

> > With this we can avoid having the 'cap' variable which is *very* confusing as well.

I agree the "caps" variable is confusing, but it's the least awful option I see.
The alternatives I can think of are:

  1. Update a dummy caps array
  2. Take a snapshot of the caps and restore them
  3. Have separate paths for updated guest CPUID versus guest caps

#1 would require passing a "u32 *" to guest_cpu_cap_change() (or an equivalent),
which I really, really don't want to do.  It' also a waste of cycles, and I'm
skeptical that it would be any less confusing than the proposed code.

#2 increases the complexity of kvm_set_cpuid() by introducing recovery paths, i.e.
adds more things that can break, and again is wasteful (though copying ~100 bytes
or so in a slow path isn't a big deal).

#3 would create unnecessary maintenance burden as we'd have to ensure any changes
hit both paths.
Xu Yilun Nov. 28, 2023, 5:13 a.m. UTC | #11
On Mon, Nov 27, 2023 at 04:43:45PM -0800, Sean Christopherson wrote:
> On Fri, Nov 24, 2023, Xu Yilun wrote:
> > On Sun, Nov 19, 2023 at 07:35:30PM +0200, Maxim Levitsky wrote:
> > > On Fri, 2023-11-10 at 15:55 -0800, Sean Christopherson wrote:
> > > >  static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> > > >  				       int nent)
> > > >  {
> > > >  	struct kvm_cpuid_entry2 *best;
> > > > +	struct kvm_vcpu *caps = vcpu;
> > > > +
> > > > +	/*
> > > > +	 * Don't update vCPU capabilities if KVM is updating CPUID entries that
> > > > +	 * are coming in from userspace!
> > > > +	 */
> > > > +	if (entries != vcpu->arch.cpuid_entries)
> > > > +		caps = NULL;
> > > 
> > > I think that this should be decided by the caller. Just a boolean will suffice.
> 
> I strongly disagree.  The _only_ time the caps should be updated is if
> entries == vcpu->arch.cpuid_entries, and if entries == cpuid_entires than the caps
> should _always_ be updated.
> 
> > kvm_set_cpuid() calls this function only to validate/adjust the temporary
> > "entries" variable. While kvm_update_cpuid_runtime() calls it to do system
> > level changes.
> > 
> > So I kind of agree to make the caller fully awared, how about adding a
> > newly named wrapper for kvm_set_cpuid(), like:
> > 
> > 
> >   static void kvm_adjust_cpuid_entry(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> > 				     int nent)
> > 
> >   {
> > 	WARN_ON(entries == vcpu->arch.cpuid_entries);
> > 	__kvm_update_cpuid_runtime(vcpu, entries, nent);
> 
> But taking it a step further, we end up with
> 
> 	WARN_ON_ONCE(update_caps != (entries == vcpu->arch.cpuid_entries));
> 
> which is silly since any bugs that would result in the WARN firing can be avoided
> by doing:
> 
> 	update_caps = entries == vcpu->arch.cpuid_entries;
> 
> which eventually distils down to the code I posted.

OK, I agree with you.

My initial idea is to make developers easier to recognize what is
happening by name, without looking into __kvm_update_cpuid_runtime().
But it seems causing other subtle confuse and wastes cycles. Maybe the
comment is already good enough for developers.

Thanks,
Yilun
diff mbox series

Patch

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 36bd04030989..37a991439fe6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -262,31 +262,48 @@  static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
 	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
 }
 
+static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
+						       struct kvm_cpuid_entry2 *entry,
+						       unsigned int x86_feature,
+						       bool has_feature)
+{
+	if (entry)
+		cpuid_entry_change(entry, x86_feature, has_feature);
+
+	if (vcpu)
+		guest_cpu_cap_change(vcpu, x86_feature, has_feature);
+}
+
 static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
 				       int nent)
 {
 	struct kvm_cpuid_entry2 *best;
+	struct kvm_vcpu *caps = vcpu;
+
+	/*
+	 * Don't update vCPU capabilities if KVM is updating CPUID entries that
+	 * are coming in from userspace!
+	 */
+	if (entries != vcpu->arch.cpuid_entries)
+		caps = NULL;
 
 	best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
-	if (best) {
-		/* Update OSXSAVE bit */
-		if (boot_cpu_has(X86_FEATURE_XSAVE))
-			cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
+
+	if (boot_cpu_has(X86_FEATURE_XSAVE))
+		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
 					   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
 
-		cpuid_entry_change(best, X86_FEATURE_APIC,
-			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
+	kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
+				   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
 
-		if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
-			cpuid_entry_change(best, X86_FEATURE_MWAIT,
-					   vcpu->arch.ia32_misc_enable_msr &
-					   MSR_IA32_MISC_ENABLE_MWAIT);
-	}
+	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
+		kvm_update_feature_runtime(caps, best, X86_FEATURE_MWAIT,
+					   vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT);
 
 	best = cpuid_entry2_find(entries, nent, 7, 0);
-	if (best && boot_cpu_has(X86_FEATURE_PKU))
-		cpuid_entry_change(best, X86_FEATURE_OSPKE,
-				   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
+	if (boot_cpu_has(X86_FEATURE_PKU))
+		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSPKE,
+					   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
 
 	best = cpuid_entry2_find(entries, nent, 0xD, 0);
 	if (best)
@@ -353,6 +370,9 @@  static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	 * Reset guest capabilities to userspace's guest CPUID definition, i.e.
 	 * honor userspace's definition for features that don't require KVM or
 	 * hardware management/support (or that KVM simply doesn't care about).
+	 *
+	 * Note, KVM has already done runtime updates on guest CPUID, i.e. this
+	 * will also correctly set runtime features in guest CPU capabilities.
 	 */
 	for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
 		const struct cpuid_reg cpuid = reverse_cpuid[i];