diff mbox

[v5] i386: Introduce ARAT CPU feature

Message ID 55740B9C.6030107@web.de (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Kiszka June 7, 2015, 9:15 a.m. UTC
From: Jan Kiszka <jan.kiszka@siemens.com>

ARAT signals that the APIC timer does not stop in power saving states.
As our APICs are emulated, it's fine to expose this feature to guests,
at least when asking for KVM host features or with CPU types that
include the flag. The exact model number that introduced the feature is
not known, but reports can be found that it's at least available since
Sandy Bridge.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---

Changes in v5:
 - rebased over master

 include/hw/i386/pc.h |  7 ++++++-
 target-i386/cpu.c    | 33 ++++++++++++++++++++++++++++++++-
 target-i386/cpu.h    |  3 +++
 target-i386/kvm.c    |  2 ++
 4 files changed, 43 insertions(+), 2 deletions(-)

Comments

Paolo Bonzini June 8, 2015, 3:40 p.m. UTC | #1
On 07/06/2015 11:15, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> ARAT signals that the APIC timer does not stop in power saving states.
> As our APICs are emulated, it's fine to expose this feature to guests,
> at least when asking for KVM host features or with CPU types that
> include the flag. The exact model number that introduced the feature is
> not known, but reports can be found that it's at least available since
> Sandy Bridge.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
> 
> Changes in v5:
>  - rebased over master
> 
>  include/hw/i386/pc.h |  7 ++++++-
>  target-i386/cpu.c    | 33 ++++++++++++++++++++++++++++++++-
>  target-i386/cpu.h    |  3 +++
>  target-i386/kvm.c    |  2 ++
>  4 files changed, 43 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index bec6de1..3b0b30f 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -294,7 +294,12 @@ int e820_get_num_entries(void);
>  bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
>  
>  #define PC_COMPAT_2_3 \
> -        HW_COMPAT_2_3
> +        HW_COMPAT_2_3 \
> +        {\
> +            .driver   = TYPE_X86_CPU,\
> +            .property = "arat",\
> +            .value    = "off",\
> +        },
>  
>  #define PC_COMPAT_2_2 \
>          PC_COMPAT_2_3 \
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index 99ad551..b5b9fc2 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -284,6 +284,17 @@ static const char *cpuid_xsave_feature_name[] = {
>      NULL, NULL, NULL, NULL,
>  };
>  
> +static const char *cpuid_6_feature_name[] = {
> +    NULL, NULL, "arat", NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +};
> +
>  #define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE)
>  #define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \
>            CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC)
> @@ -339,6 +350,7 @@ static const char *cpuid_xsave_feature_name[] = {
>            CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
>            CPUID_7_0_EBX_RDSEED */
>  #define TCG_APM_FEATURES 0
> +#define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
>  
>  
>  typedef struct FeatureWordInfo {
> @@ -408,6 +420,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
>          .cpuid_reg = R_EAX,
>          .tcg_features = 0,
>      },
> +    [FEAT_6_EAX] = {
> +        .feat_names = cpuid_6_feature_name,
> +        .cpuid_eax = 6, .cpuid_reg = R_EAX,
> +        .tcg_features = TCG_6_EAX_FEATURES,
> +    },
>  };
>  
>  typedef struct X86RegisterInfo32 {
> @@ -1001,6 +1018,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
>          .features[FEAT_8000_0001_ECX] =
>              CPUID_EXT3_LAHF_LM,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Westmere E56xx/L56xx/X56xx (Nehalem-C)",
>      },
> @@ -1030,6 +1049,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_EXT3_LAHF_LM,
>          .features[FEAT_XSAVE] =
>              CPUID_XSAVE_XSAVEOPT,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Intel Xeon E312xx (Sandy Bridge)",
>      },
> @@ -1062,6 +1083,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_EXT3_LAHF_LM,
>          .features[FEAT_XSAVE] =
>              CPUID_XSAVE_XSAVEOPT,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Intel Xeon E3-12xx v2 (Ivy Bridge)",
>      },
> @@ -1096,6 +1119,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID,
>          .features[FEAT_XSAVE] =
>              CPUID_XSAVE_XSAVEOPT,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Intel Core Processor (Haswell, no TSX)",
>      },    {
> @@ -1130,6 +1155,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_7_0_EBX_RTM,
>          .features[FEAT_XSAVE] =
>              CPUID_XSAVE_XSAVEOPT,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Intel Core Processor (Haswell)",
>      },
> @@ -1166,6 +1193,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_7_0_EBX_SMAP,
>          .features[FEAT_XSAVE] =
>              CPUID_XSAVE_XSAVEOPT,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Intel Core Processor (Broadwell, no TSX)",
>      },
> @@ -1202,6 +1231,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_7_0_EBX_SMAP,
>          .features[FEAT_XSAVE] =
>              CPUID_XSAVE_XSAVEOPT,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Intel Core Processor (Broadwell)",
>      },
> @@ -2358,7 +2389,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>          break;
>      case 6:
>          /* Thermal and Power Leaf */
> -        *eax = 0;
> +        *eax = env->features[FEAT_6_EAX];
>          *ebx = 0;
>          *ecx = 0;
>          *edx = 0;
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 26182bd..074a9c9 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -412,6 +412,7 @@ typedef enum FeatureWord {
>      FEAT_KVM,           /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */
>      FEAT_SVM,           /* CPUID[8000_000A].EDX */
>      FEAT_XSAVE,         /* CPUID[EAX=0xd,ECX=1].EAX */
> +    FEAT_6_EAX,         /* CPUID[6].EAX */
>      FEATURE_WORDS,
>  } FeatureWord;
>  
> @@ -577,6 +578,8 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
>  #define CPUID_XSAVE_XGETBV1    (1U << 2)
>  #define CPUID_XSAVE_XSAVES     (1U << 3)
>  
> +#define CPUID_6_EAX_ARAT       (1U << 2)
> +
>  /* CPUID[0x80000007].EDX flags: */
>  #define CPUID_APM_INVTSC       (1U << 8)
>  
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index ca2da84..d7bd205 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -233,6 +233,8 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
>          if (!kvm_irqchip_in_kernel()) {
>              ret &= ~CPUID_EXT_X2APIC;
>          }
> +    } else if (function == 6 && reg == R_EAX) {
> +        ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */
>      } else if (function == 0x80000001 && reg == R_EDX) {
>          /* On Intel, kvm returns cpuid according to the Intel spec,
>           * so add missing bits according to the AMD spec:
> 

Acked-by: Paolo Bonzini <pbonzini@redhat.com>

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin June 8, 2015, 3:50 p.m. UTC | #2
On Sun, Jun 07, 2015 at 11:15:08AM +0200, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> ARAT signals that the APIC timer does not stop in power saving states.
> As our APICs are emulated, it's fine to expose this feature to guests,
> at least when asking for KVM host features or with CPU types that
> include the flag. The exact model number that introduced the feature is
> not known, but reports can be found that it's at least available since
> Sandy Bridge.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>

For the pc.h change:

Acked-by: Michael S. Tsirkin <mst@redhat.com>


> ---
> 
> Changes in v5:
>  - rebased over master
> 
>  include/hw/i386/pc.h |  7 ++++++-
>  target-i386/cpu.c    | 33 ++++++++++++++++++++++++++++++++-
>  target-i386/cpu.h    |  3 +++
>  target-i386/kvm.c    |  2 ++
>  4 files changed, 43 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index bec6de1..3b0b30f 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -294,7 +294,12 @@ int e820_get_num_entries(void);
>  bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
>  
>  #define PC_COMPAT_2_3 \
> -        HW_COMPAT_2_3
> +        HW_COMPAT_2_3 \
> +        {\
> +            .driver   = TYPE_X86_CPU,\
> +            .property = "arat",\
> +            .value    = "off",\
> +        },
>  
>  #define PC_COMPAT_2_2 \
>          PC_COMPAT_2_3 \
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index 99ad551..b5b9fc2 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -284,6 +284,17 @@ static const char *cpuid_xsave_feature_name[] = {
>      NULL, NULL, NULL, NULL,
>  };
>  
> +static const char *cpuid_6_feature_name[] = {
> +    NULL, NULL, "arat", NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +};
> +
>  #define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE)
>  #define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \
>            CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC)
> @@ -339,6 +350,7 @@ static const char *cpuid_xsave_feature_name[] = {
>            CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
>            CPUID_7_0_EBX_RDSEED */
>  #define TCG_APM_FEATURES 0
> +#define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
>  
>  
>  typedef struct FeatureWordInfo {
> @@ -408,6 +420,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
>          .cpuid_reg = R_EAX,
>          .tcg_features = 0,
>      },
> +    [FEAT_6_EAX] = {
> +        .feat_names = cpuid_6_feature_name,
> +        .cpuid_eax = 6, .cpuid_reg = R_EAX,
> +        .tcg_features = TCG_6_EAX_FEATURES,
> +    },
>  };
>  
>  typedef struct X86RegisterInfo32 {
> @@ -1001,6 +1018,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
>          .features[FEAT_8000_0001_ECX] =
>              CPUID_EXT3_LAHF_LM,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Westmere E56xx/L56xx/X56xx (Nehalem-C)",
>      },
> @@ -1030,6 +1049,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_EXT3_LAHF_LM,
>          .features[FEAT_XSAVE] =
>              CPUID_XSAVE_XSAVEOPT,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Intel Xeon E312xx (Sandy Bridge)",
>      },
> @@ -1062,6 +1083,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_EXT3_LAHF_LM,
>          .features[FEAT_XSAVE] =
>              CPUID_XSAVE_XSAVEOPT,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Intel Xeon E3-12xx v2 (Ivy Bridge)",
>      },
> @@ -1096,6 +1119,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID,
>          .features[FEAT_XSAVE] =
>              CPUID_XSAVE_XSAVEOPT,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Intel Core Processor (Haswell, no TSX)",
>      },    {
> @@ -1130,6 +1155,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_7_0_EBX_RTM,
>          .features[FEAT_XSAVE] =
>              CPUID_XSAVE_XSAVEOPT,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Intel Core Processor (Haswell)",
>      },
> @@ -1166,6 +1193,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_7_0_EBX_SMAP,
>          .features[FEAT_XSAVE] =
>              CPUID_XSAVE_XSAVEOPT,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Intel Core Processor (Broadwell, no TSX)",
>      },
> @@ -1202,6 +1231,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
>              CPUID_7_0_EBX_SMAP,
>          .features[FEAT_XSAVE] =
>              CPUID_XSAVE_XSAVEOPT,
> +        .features[FEAT_6_EAX] =
> +            CPUID_6_EAX_ARAT,
>          .xlevel = 0x8000000A,
>          .model_id = "Intel Core Processor (Broadwell)",
>      },
> @@ -2358,7 +2389,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>          break;
>      case 6:
>          /* Thermal and Power Leaf */
> -        *eax = 0;
> +        *eax = env->features[FEAT_6_EAX];
>          *ebx = 0;
>          *ecx = 0;
>          *edx = 0;
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 26182bd..074a9c9 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -412,6 +412,7 @@ typedef enum FeatureWord {
>      FEAT_KVM,           /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */
>      FEAT_SVM,           /* CPUID[8000_000A].EDX */
>      FEAT_XSAVE,         /* CPUID[EAX=0xd,ECX=1].EAX */
> +    FEAT_6_EAX,         /* CPUID[6].EAX */
>      FEATURE_WORDS,
>  } FeatureWord;
>  
> @@ -577,6 +578,8 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
>  #define CPUID_XSAVE_XGETBV1    (1U << 2)
>  #define CPUID_XSAVE_XSAVES     (1U << 3)
>  
> +#define CPUID_6_EAX_ARAT       (1U << 2)
> +
>  /* CPUID[0x80000007].EDX flags: */
>  #define CPUID_APM_INVTSC       (1U << 8)
>  
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index ca2da84..d7bd205 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -233,6 +233,8 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
>          if (!kvm_irqchip_in_kernel()) {
>              ret &= ~CPUID_EXT_X2APIC;
>          }
> +    } else if (function == 6 && reg == R_EAX) {
> +        ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */
>      } else if (function == 0x80000001 && reg == R_EDX) {
>          /* On Intel, kvm returns cpuid according to the Intel spec,
>           * so add missing bits according to the AMD spec:
> -- 
> 2.1.4
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eduardo Habkost June 18, 2015, 8:21 p.m. UTC | #3
On Sun, Jun 07, 2015 at 11:15:08AM +0200, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> ARAT signals that the APIC timer does not stop in power saving states.
> As our APICs are emulated, it's fine to expose this feature to guests,
> at least when asking for KVM host features or with CPU types that
> include the flag. The exact model number that introduced the feature is
> not known, but reports can be found that it's at least available since
> Sandy Bridge.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>

The code looks good now, but: what are the real consequences of
enabling/disabling the flag? What exactly guests use it for?

Isn't this going to make guests have additional expectations about the
APIC timer that may be broken when live-migrating or pausing the VM?
Jan Kiszka June 21, 2015, 5:38 p.m. UTC | #4
On 2015-06-18 22:21, Eduardo Habkost wrote:
> On Sun, Jun 07, 2015 at 11:15:08AM +0200, Jan Kiszka wrote:
>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>
>> ARAT signals that the APIC timer does not stop in power saving states.
>> As our APICs are emulated, it's fine to expose this feature to guests,
>> at least when asking for KVM host features or with CPU types that
>> include the flag. The exact model number that introduced the feature is
>> not known, but reports can be found that it's at least available since
>> Sandy Bridge.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> 
> The code looks good now, but: what are the real consequences of
> enabling/disabling the flag? What exactly guests use it for?
> 
> Isn't this going to make guests have additional expectations about the
> APIC timer that may be broken when live-migrating or pausing the VM?

ARAT only refers to stopping of the timer in certain power states (which
we do not even emulate IIRC). In that case, the OS is under risk of
sleeping forever, thus need to look for a different wakeup source.
Live-migration or VM pausing are external effects on all timers of the
guest, not only the APIC. However, none of them cause a wakeup miss -
provided the host decides to resume the guest eventually.

Jan
Wanpeng Li June 23, 2015, 2:50 a.m. UTC | #5
On 6/22/15 1:38 AM, Jan Kiszka wrote:
> On 2015-06-18 22:21, Eduardo Habkost wrote:
>> On Sun, Jun 07, 2015 at 11:15:08AM +0200, Jan Kiszka wrote:
>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>
>>> ARAT signals that the APIC timer does not stop in power saving states.
>>> As our APICs are emulated, it's fine to expose this feature to guests,
>>> at least when asking for KVM host features or with CPU types that
>>> include the flag. The exact model number that introduced the feature is
>>> not known, but reports can be found that it's at least available since
>>> Sandy Bridge.
>>>
>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> The code looks good now, but: what are the real consequences of
>> enabling/disabling the flag? What exactly guests use it for?
>>
>> Isn't this going to make guests have additional expectations about the
>> APIC timer that may be broken when live-migrating or pausing the VM?
> ARAT only refers to stopping of the timer in certain power states (which
> we do not even emulate IIRC). In that case, the OS is under risk of
> sleeping forever, thus need to look for a different wakeup source.

HPET will always be the default broadcast event device I think.

Regards,
Wanpeng Li

> Live-migration or VM pausing are external effects on all timers of the
> guest, not only the APIC. However, none of them cause a wakeup miss -
> provided the host decides to resume the guest eventually.
>
> Jan
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
Jan Kiszka June 23, 2015, 5:04 a.m. UTC | #6
On 2015-06-23 04:50, Wanpeng Li wrote:
> 
> 
> On 6/22/15 1:38 AM, Jan Kiszka wrote:
>> On 2015-06-18 22:21, Eduardo Habkost wrote:
>>> On Sun, Jun 07, 2015 at 11:15:08AM +0200, Jan Kiszka wrote:
>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>
>>>> ARAT signals that the APIC timer does not stop in power saving states.
>>>> As our APICs are emulated, it's fine to expose this feature to guests,
>>>> at least when asking for KVM host features or with CPU types that
>>>> include the flag. The exact model number that introduced the feature is
>>>> not known, but reports can be found that it's at least available since
>>>> Sandy Bridge.
>>>>
>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>> The code looks good now, but: what are the real consequences of
>>> enabling/disabling the flag? What exactly guests use it for?
>>>
>>> Isn't this going to make guests have additional expectations about the
>>> APIC timer that may be broken when live-migrating or pausing the VM?
>> ARAT only refers to stopping of the timer in certain power states (which
>> we do not even emulate IIRC). In that case, the OS is under risk of
>> sleeping forever, thus need to look for a different wakeup source.
> 
> HPET will always be the default broadcast event device I think.

But it's unused (under Linux) if per-cpu clockevents are unaffected by
CLOCK_EVT_FEAT_C3STOP (x86-only "none-feature"), i.e. have ARAT set. And
other guests may have other strategies to deal with missing ARAT.

Again, the scenario for me was not a regular setup but some Jailhouse
boot of Linux where neither a HPET nor a PIT are available as broadcast
sources and Linux therefore refuses to switch to hires mode - in
contrast to running on real hardware.

Jan
Eduardo Habkost June 25, 2015, 6:23 p.m. UTC | #7
On Sun, Jun 07, 2015 at 11:15:08AM +0200, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> ARAT signals that the APIC timer does not stop in power saving states.
> As our APICs are emulated, it's fine to expose this feature to guests,
> at least when asking for KVM host features or with CPU types that
> include the flag. The exact model number that introduced the feature is
> not known, but reports can be found that it's at least available since
> Sandy Bridge.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>

Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>

Applied to the x86 tree. Thanks!
diff mbox

Patch

diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index bec6de1..3b0b30f 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -294,7 +294,12 @@  int e820_get_num_entries(void);
 bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
 #define PC_COMPAT_2_3 \
-        HW_COMPAT_2_3
+        HW_COMPAT_2_3 \
+        {\
+            .driver   = TYPE_X86_CPU,\
+            .property = "arat",\
+            .value    = "off",\
+        },
 
 #define PC_COMPAT_2_2 \
         PC_COMPAT_2_3 \
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 99ad551..b5b9fc2 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -284,6 +284,17 @@  static const char *cpuid_xsave_feature_name[] = {
     NULL, NULL, NULL, NULL,
 };
 
+static const char *cpuid_6_feature_name[] = {
+    NULL, NULL, "arat", NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+};
+
 #define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE)
 #define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \
           CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC)
@@ -339,6 +350,7 @@  static const char *cpuid_xsave_feature_name[] = {
           CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
           CPUID_7_0_EBX_RDSEED */
 #define TCG_APM_FEATURES 0
+#define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
 
 
 typedef struct FeatureWordInfo {
@@ -408,6 +420,11 @@  static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         .cpuid_reg = R_EAX,
         .tcg_features = 0,
     },
+    [FEAT_6_EAX] = {
+        .feat_names = cpuid_6_feature_name,
+        .cpuid_eax = 6, .cpuid_reg = R_EAX,
+        .tcg_features = TCG_6_EAX_FEATURES,
+    },
 };
 
 typedef struct X86RegisterInfo32 {
@@ -1001,6 +1018,8 @@  static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
         .features[FEAT_8000_0001_ECX] =
             CPUID_EXT3_LAHF_LM,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Westmere E56xx/L56xx/X56xx (Nehalem-C)",
     },
@@ -1030,6 +1049,8 @@  static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_EXT3_LAHF_LM,
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Intel Xeon E312xx (Sandy Bridge)",
     },
@@ -1062,6 +1083,8 @@  static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_EXT3_LAHF_LM,
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Intel Xeon E3-12xx v2 (Ivy Bridge)",
     },
@@ -1096,6 +1119,8 @@  static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID,
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Intel Core Processor (Haswell, no TSX)",
     },    {
@@ -1130,6 +1155,8 @@  static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_7_0_EBX_RTM,
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Intel Core Processor (Haswell)",
     },
@@ -1166,6 +1193,8 @@  static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_7_0_EBX_SMAP,
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Intel Core Processor (Broadwell, no TSX)",
     },
@@ -1202,6 +1231,8 @@  static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_7_0_EBX_SMAP,
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Intel Core Processor (Broadwell)",
     },
@@ -2358,7 +2389,7 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         break;
     case 6:
         /* Thermal and Power Leaf */
-        *eax = 0;
+        *eax = env->features[FEAT_6_EAX];
         *ebx = 0;
         *ecx = 0;
         *edx = 0;
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 26182bd..074a9c9 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -412,6 +412,7 @@  typedef enum FeatureWord {
     FEAT_KVM,           /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */
     FEAT_SVM,           /* CPUID[8000_000A].EDX */
     FEAT_XSAVE,         /* CPUID[EAX=0xd,ECX=1].EAX */
+    FEAT_6_EAX,         /* CPUID[6].EAX */
     FEATURE_WORDS,
 } FeatureWord;
 
@@ -577,6 +578,8 @@  typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_XSAVE_XGETBV1    (1U << 2)
 #define CPUID_XSAVE_XSAVES     (1U << 3)
 
+#define CPUID_6_EAX_ARAT       (1U << 2)
+
 /* CPUID[0x80000007].EDX flags: */
 #define CPUID_APM_INVTSC       (1U << 8)
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index ca2da84..d7bd205 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -233,6 +233,8 @@  uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
         if (!kvm_irqchip_in_kernel()) {
             ret &= ~CPUID_EXT_X2APIC;
         }
+    } else if (function == 6 && reg == R_EAX) {
+        ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */
     } else if (function == 0x80000001 && reg == R_EDX) {
         /* On Intel, kvm returns cpuid according to the Intel spec,
          * so add missing bits according to the AMD spec: