diff mbox series

KVM: x86/xcr0: Don't make XFEATURE_MASK_SSE a mandatory bit setting

Message ID 20220119070427.33801-1-likexu@tencent.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/xcr0: Don't make XFEATURE_MASK_SSE a mandatory bit setting | expand

Commit Message

Like Xu Jan. 19, 2022, 7:04 a.m. UTC
From: Like Xu <likexu@tencent.com>

If "bits 1 and 2 of XCR0" are cleared and set only "bit 0 of XCR0",
the x86 hardware does work properly and the CPUID leaf of the states
size will be updated as follows:

xsetbv(XCR_XFEATURE_ENABLED_MASK, 0x2e7);
  CPUID[0d, 00]: eax=000002e7 ebx=00000a88 ecx=00000a88 edx=00000000
xsetbv(XCR_XFEATURE_ENABLED_MASK, 0x1);
  CPUID[0d, 00]: eax=000002e7 ebx=00000240 ecx=00000a88 edx=00000000

According to Intel SDM "XSETBV - Set Extended Control Register",
the truth emerges: only the "bit 0 of XCR0 (corresponding to x87 state)
must be set to 1", and in addition, it's necessary to set both bits
(XCR0[1] and XCR0[2]) to use AVX instructions".

Therefore, it's pretty safe for KVM to run a user-defined guest w/o
using AVX instructions and in that case, the kvm_update_cpuid_runtime()
cannot work as expected due to the wrong macro definition.

Remove the XFEATURE_MASK_SSE bit as part of the XFEATURE_MASK_EXTEND
and opportunistically, move it into the context of its unique user KVM.

Fixes: 56c103ec040b ("KVM: x86: Fix xsave cpuid exposing bug")
Signed-off-by: Like Xu <likexu@tencent.com>
---
 arch/x86/include/asm/fpu/xstate.h | 3 ---
 arch/x86/kvm/cpuid.h              | 8 ++++++++
 2 files changed, 8 insertions(+), 3 deletions(-)

Comments

Dave Hansen Jan. 19, 2022, 3:23 p.m. UTC | #1
On 1/18/22 11:04 PM, Like Xu wrote:
> Remove the XFEATURE_MASK_SSE bit as part of the XFEATURE_MASK_EXTEND
> and opportunistically, move it into the context of its unique user KVM.

Is this a problem for xstate_required_size()?  The rules for the CPUID
sub-functions <=1 are different than those for >1.  Most importantly,
'eax' doesn't enumerate the size of the feature for the XFEATURE_SSE
sub-leaf.

I think XFEATURE_MASK_EXTEND was being used to avoid that oddity:

> u32 xstate_required_size(u64 xstate_bv, bool compacted)
> {
>         int feature_bit = 0;
>         u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
> 
>         xstate_bv &= XFEATURE_MASK_EXTEND;
>         while (xstate_bv) {
>                 if (xstate_bv & 0x1) {
>                         u32 eax, ebx, ecx, edx, offset;
>                         cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
>                         /* ECX[1]: 64B alignment in compacted form */
>                         if (compacted)
>                                 offset = (ecx & 0x2) ? ALIGN(ret, 64) : ret;
>                         else
>                                 offset = ebx;
>                         ret = max(ret, offset + eax);
>                 }
> 
>                 xstate_bv >>= 1;
>                 feature_bit++;
>         }
> 
>         return ret;
> }
Like Xu Jan. 20, 2022, 12:01 p.m. UTC | #2
On 19/1/2022 11:23 pm, Dave Hansen wrote:
> On 1/18/22 11:04 PM, Like Xu wrote:
>> Remove the XFEATURE_MASK_SSE bit as part of the XFEATURE_MASK_EXTEND
>> and opportunistically, move it into the context of its unique user KVM.
> 
> Is this a problem for xstate_required_size()?  The rules for the CPUID
> sub-functions <=1 are different than those for >1.  Most importantly,
> 'eax' doesn't enumerate the size of the feature for the XFEATURE_SSE
> sub-leaf.

Indeed.

> 
> I think XFEATURE_MASK_EXTEND was being used to avoid that oddity:

It seems that the cpuid.0xd.0.ebx size update for the SSE+AVX state needs
to be triggered by setting bit 2 which is quite odd:

  XCR0 = 001B, ebx=00000240
  XCR0 = 011B, ebx=00000240
  XCR0 = 111B, ebx=00000340

Thank you and sorry for the noise.

> 
>> u32 xstate_required_size(u64 xstate_bv, bool compacted)
>> {
>>          int feature_bit = 0;
>>          u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
>>
>>          xstate_bv &= XFEATURE_MASK_EXTEND;
>>          while (xstate_bv) {
>>                  if (xstate_bv & 0x1) {
>>                          u32 eax, ebx, ecx, edx, offset;
>>                          cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
>>                          /* ECX[1]: 64B alignment in compacted form */
>>                          if (compacted)
>>                                  offset = (ecx & 0x2) ? ALIGN(ret, 64) : ret;
>>                          else
>>                                  offset = ebx;
>>                          ret = max(ret, offset + eax);
>>                  }
>>
>>                  xstate_bv >>= 1;
>>                  feature_bit++;
>>          }
>>
>>          return ret;
>> }
diff mbox series

Patch

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index cd3dd170e23a..7d331a4aa2e2 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -9,9 +9,6 @@ 
 #include <asm/fpu/api.h>
 #include <asm/user.h>
 
-/* Bit 63 of XCR0 is reserved for future expansion */
-#define XFEATURE_MASK_EXTEND	(~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
-
 #define XSTATE_CPUID		0x0000000d
 
 #define TILE_CPUID		0x0000001d
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 8a770b481d9d..5e36157febb2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -8,6 +8,14 @@ 
 #include <asm/processor.h>
 #include <uapi/asm/kvm_para.h>
 
+/*
+ * Bit 63 of XCR0 is reserved for future expansion,
+ * and will not represent a processor state component.
+ *
+ * Only bit 0 of XCR0 (x87 state) must be set to 1.
+ */
+#define XFEATURE_MASK_EXTEND	(~(XFEATURE_MASK_FP | (1ULL << 63)))
+
 extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
 void kvm_set_cpu_caps(void);