[03/10] x86/cpuid: Handle leaf 0x1 in guest_cpuid()

Message ID	1487588434-4359-4-git-send-email-andrew.cooper3@citrix.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <xen-devel-bounces@lists.xen.org> From: Andrew Cooper <andrew.cooper3@citrix.com> To: Xen-devel <xen-devel@lists.xen.org> Date: Mon, 20 Feb 2017 11:00:27 +0000 Message-ID: <1487588434-4359-4-git-send-email-andrew.cooper3@citrix.com> In-Reply-To: <1487588434-4359-1-git-send-email-andrew.cooper3@citrix.com> References: <1487588434-4359-1-git-send-email-andrew.cooper3@citrix.com> MIME-Version: 1.0 Cc: Andrew Cooper <andrew.cooper3@citrix.com>, Boris Ostrovsky <boris.ostrovsky@oracle.com>, Jan Beulich <JBeulich@suse.com> Subject: [Xen-devel] [PATCH 03/10] x86/cpuid: Handle leaf 0x1 in guest_cpuid() Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: xen-devel-bounces@lists.xen.org Sender: "Xen-devel" <xen-devel-bounces@lists.xen.org>

diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c index e0a387e..3ecb794 100644 --- a/xen/arch/x86/cpuid.c +++ b/xen/arch/x86/cpuid.c @@ -176,6 +176,9 @@ static void recalculate_misc(struct cpuid_policy *p) switch ( p->x86_vendor ) { case X86_VENDOR_INTEL: + p->basic.raw_fms &= 0x0fff3fff; + p->basic.apic_id = 0; /* Dynamic. */ + p->basic.l2_nr_queries = 1; /* Fixed to 1 query. */ p->basic.raw[0x3] = EMPTY_LEAF; /* PSN - always hidden. */ p->basic.raw[0x9] = EMPTY_LEAF; /* DCA - always hidden. */ @@ -194,6 +197,9 @@ static void recalculate_misc(struct cpuid_policy *p) break; case X86_VENDOR_AMD: + p->basic.raw_fms &= 0x0fff0fff; + p->basic.apic_id = 0; /* Dynamic. */ + zero_leaves(p->basic.raw, 0x2, 0x3); p->basic.raw[0x9] = EMPTY_LEAF; @@ -502,6 +508,9 @@ void recalculate_cpuid_policy(struct domain *d) cpuid_featureset_to_policy(fs, p); + /* Pass host cacheline size through to guests. */ + p->basic.clflush_size = max->basic.clflush_size; + p->extd.maxphysaddr = min(p->extd.maxphysaddr, max->extd.maxphysaddr); p->extd.maxphysaddr = min_t(uint8_t, p->extd.maxphysaddr, d->arch.paging.gfn_bits + PAGE_SHIFT); @@ -574,7 +583,6 @@ static void pv_cpuid(uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *res) { struct vcpu *curr = current; struct domain *currd = curr->domain; - const struct cpuid_policy *p = currd->arch.cpuid; if ( !is_control_domain(currd) && !is_hardware_domain(currd) ) domain_cpuid(currd, leaf, subleaf, res); @@ -583,147 +591,6 @@ static void pv_cpuid(uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *res) switch ( leaf ) { - case 0x00000001: - res->c = p->basic._1c; - res->d = p->basic._1d; - - if ( !is_pvh_domain(currd) ) - { - const struct cpu_user_regs *regs = guest_cpu_user_regs(); - - /* - * Delete the PVH condition when HVMLite formally replaces PVH, - * and HVM guests no longer enter a PV codepath. - */ - - /* - * !!! OSXSAVE handling for PV guests is non-architectural !!! - * - * Architecturally, the correct code here is simply: - * - * if ( curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE ) - * c |= cpufeat_mask(X86_FEATURE_OSXSAVE); - * - * However because of bugs in Xen (before c/s bd19080b, Nov 2010, - * the XSAVE cpuid flag leaked into guests despite the feature not - * being available for use), buggy workarounds where introduced to - * Linux (c/s 947ccf9c, also Nov 2010) which relied on the fact - * that Xen also incorrectly leaked OSXSAVE into the guest. - * - * Furthermore, providing architectural OSXSAVE behaviour to a - * many Linux PV guests triggered a further kernel bug when the - * fpu code observes that XSAVEOPT is available, assumes that - * xsave state had been set up for the task, and follows a wild - * pointer. - * - * Older Linux PVOPS kernels however do require architectural - * behaviour. They observe Xen's leaked OSXSAVE and assume they - * can already use XSETBV, dying with a #UD because the shadowed - * CR4.OSXSAVE is clear. This behaviour has been adjusted in all - * observed cases via stable backports of the above changeset. - * - * Therefore, the leaking of Xen's OSXSAVE setting has become a - * defacto part of the PV ABI and can't reasonably be corrected. - * It can however be restricted to only the enlightened CPUID - * view, as seen by the guest kernel. - * - * The following situations and logic now applies: - * - * - Hardware without CPUID faulting support and native CPUID: - * There is nothing Xen can do here. The hosts XSAVE flag will - * leak through and Xen's OSXSAVE choice will leak through. - * - * In the case that the guest kernel has not set up OSXSAVE, only - * SSE will be set in xcr0, and guest userspace can't do too much - * damage itself. - * - * - Enlightened CPUID or CPUID faulting available: - * Xen can fully control what is seen here. Guest kernels need - * to see the leaked OSXSAVE via the enlightened path, but - * guest userspace and the native is given architectural - * behaviour. - * - * Emulated vs Faulted CPUID is distinguised based on whether a - * #UD or #GP is currently being serviced. - */ - /* OSXSAVE clear in policy. Fast-forward CR4 back in. */ - if ( (curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE) || - (regs->entry_vector == TRAP_invalid_op && - guest_kernel_mode(curr, regs) && - (read_cr4() & X86_CR4_OSXSAVE)) ) - res->c |= cpufeat_mask(X86_FEATURE_OSXSAVE); - - /* - * At the time of writing, a PV domain is the only viable option - * for Dom0. Several interactions between dom0 and Xen for real - * hardware setup have unfortunately been implemented based on - * state which incorrectly leaked into dom0. - * - * These leaks are retained for backwards compatibility, but - * restricted to the hardware domains kernel only. - */ - if ( is_hardware_domain(currd) && guest_kernel_mode(curr, regs) ) - { - /* - * MTRR used to unconditionally leak into PV guests. They - * cannot MTRR infrastructure at all, and shouldn't be able to - * see the feature. - * - * Modern PVOPS Linux self-clobbers the MTRR feature, to avoid - * trying to use the associated MSRs. Xenolinux-based PV dom0's - * however use the MTRR feature as an indication of the presence - * of the XENPF_{add,del,read}_memtype hypercalls. - */ - if ( cpu_has_mtrr ) - res->d |= cpufeat_mask(X86_FEATURE_MTRR); - - /* - * MONITOR never leaked into PV guests, as PV guests cannot - * use the MONITOR/MWAIT instructions. As such, they require - * the feature to not being present in emulated CPUID. - * - * Modern PVOPS Linux try to be cunning and use native CPUID - * to see if the hardware actually supports MONITOR, and by - * extension, deep C states. - * - * If the feature is seen, deep-C state information is - * obtained from the DSDT and handed back to Xen via the - * XENPF_set_processor_pminfo hypercall. - * - * This mechanism is incompatible with an HVM-based hardware - * domain, and also with CPUID Faulting. - * - * Luckily, Xen can be just as 'cunning', and distinguish an - * emulated CPUID from a faulted CPUID by whether a #UD or #GP - * fault is currently being serviced. Yuck... - */ - if ( cpu_has_monitor && regs->entry_vector == TRAP_gp_fault ) - res->c |= cpufeat_mask(X86_FEATURE_MONITOR); - - /* - * While MONITOR never leaked into PV guests, EIST always used - * to. - * - * Modern PVOPS will only parse P state information from the - * DSDT and return it to Xen if EIST is seen in the emulated - * CPUID information. - */ - if ( cpu_has_eist ) - res->c |= cpufeat_mask(X86_FEATURE_EIST); - } - } - - if ( vpmu_enabled(curr) && - vpmu_is_set(vcpu_vpmu(curr), VPMU_CPU_HAS_DS) ) - { - res->d |= cpufeat_mask(X86_FEATURE_DS); - if ( cpu_has(&current_cpu_data, X86_FEATURE_DTES64) ) - res->c |= cpufeat_mask(X86_FEATURE_DTES64); - if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) ) - res->c |= cpufeat_mask(X86_FEATURE_DSCPL); - } - break; - case 0x0000000a: /* Architectural Performance Monitor Features (Intel) */ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || !vpmu_enabled(curr) ) @@ -740,8 +607,7 @@ static void pv_cpuid(uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *res) *res = EMPTY_LEAF; break; - case 0x0: - case 0x2 ... 0x3: + case 0x0 ... 0x3: case 0x7 ... 0x9: case 0xc ... XSTATE_CPUID: case 0x80000000 ... 0xffffffff: @@ -754,57 +620,11 @@ static void hvm_cpuid(uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *res) { struct vcpu *v = current; struct domain *d = v->domain; - const struct cpuid_policy *p = d->arch.cpuid; domain_cpuid(d, leaf, subleaf, res); switch ( leaf ) { - case 0x1: - /* Fix up VLAPIC details. */ - res->b &= 0x00FFFFFFu; - res->b |= (v->vcpu_id * 2) << 24; - - res->c = p->basic._1c; - res->d = p->basic._1d; - - /* APIC exposed to guests, but Fast-forward MSR_APIC_BASE.EN back in. */ - if ( vlapic_hw_disabled(vcpu_vlapic(v)) ) - res->d &= ~cpufeat_bit(X86_FEATURE_APIC); - - /* OSXSAVE clear in policy. Fast-forward CR4 back in. */ - if ( v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE ) - res->c |= cpufeat_mask(X86_FEATURE_OSXSAVE); - - /* - * PSE36 is not supported in shadow mode. This bit should be - * unilaterally cleared. - * - * However, an unspecified version of Hyper-V from 2011 refuses - * to start as the "cpu does not provide required hw features" if - * it can't see PSE36. - * - * As a workaround, leak the toolstack-provided PSE36 value into a - * shadow guest if the guest is already using PAE paging (and won't - * care about reverting back to PSE paging). Otherwise, knoble it, so - * a 32bit guest doesn't get the impression that it could try to use - * PSE36 paging. - */ - if ( !hap_enabled(d) && !hvm_pae_enabled(v) ) - res->d &= ~cpufeat_mask(X86_FEATURE_PSE36); - - if ( vpmu_enabled(v) && - vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) ) - { - res->d |= cpufeat_mask(X86_FEATURE_DS); - if ( cpu_has(&current_cpu_data, X86_FEATURE_DTES64) ) - res->c |= cpufeat_mask(X86_FEATURE_DTES64); - if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) ) - res->c |= cpufeat_mask(X86_FEATURE_DSCPL); - } - - break; - case 0xb: /* Fix the x2APIC identifier. */ res->d = v->vcpu_id * 2; @@ -822,8 +642,7 @@ static void hvm_cpuid(uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *res) res->a = (res->a & ~0xff) | 3; break; - case 0x0: - case 0x2 ... 0x3: + case 0x0 ... 0x3: case 0x7 ... 0x9: case 0xc ... XSTATE_CPUID: case 0x80000000 ... 0xffffffff: @@ -876,8 +695,7 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, default: goto legacy; - case 0x0: - case 0x2 ... 0x3: + case 0x0 ... 0x3: case 0x8 ... 0x9: case 0xc: *res = p->basic.raw[leaf]; @@ -928,6 +746,141 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, */ switch ( leaf ) { + const struct cpu_user_regs *regs; + + case 0x1: + /* TODO: Rework topology logic. */ + res->b &= 0x00ffffffu; + res->b |= (v->vcpu_id * 2) << 24; + + /* TODO: Rework vPMU control in terms of toolstack choices. */ + if ( vpmu_enabled(v) && + vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) ) + { + res->d |= cpufeat_mask(X86_FEATURE_DS); + if ( cpu_has(&current_cpu_data, X86_FEATURE_DTES64) ) + res->c |= cpufeat_mask(X86_FEATURE_DTES64); + if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) ) + res->c |= cpufeat_mask(X86_FEATURE_DSCPL); + } + + if ( has_hvm_container_domain(d) ) + { + /* OSXSAVE clear in policy. Fast-forward CR4 back in. */ + if ( v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE ) + res->c |= cpufeat_mask(X86_FEATURE_OSXSAVE); + } + else /* PV domain */ + { + regs = guest_cpu_user_regs(); + + /* + * !!! OSXSAVE handling for PV guests is non-architectural !!! + * + * Architecturally, the correct code here is simply: + * + * if ( v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE ) + * c |= cpufeat_mask(X86_FEATURE_OSXSAVE); + * + * However because of bugs in Xen (before c/s bd19080b, Nov 2010, + * the XSAVE cpuid flag leaked into guests despite the feature not + * being available for use), buggy workarounds where introduced to + * Linux (c/s 947ccf9c, also Nov 2010) which relied on the fact + * that Xen also incorrectly leaked OSXSAVE into the guest. + * + * Furthermore, providing architectural OSXSAVE behaviour to a + * many Linux PV guests triggered a further kernel bug when the + * fpu code observes that XSAVEOPT is available, assumes that + * xsave state had been set up for the task, and follows a wild + * pointer. + * + * Older Linux PVOPS kernels however do require architectural + * behaviour. They observe Xen's leaked OSXSAVE and assume they + * can already use XSETBV, dying with a #UD because the shadowed + * CR4.OSXSAVE is clear. This behaviour has been adjusted in all + * observed cases via stable backports of the above changeset. + * + * Therefore, the leaking of Xen's OSXSAVE setting has become a + * defacto part of the PV ABI and can't reasonably be corrected. + * It can however be restricted to only the enlightened CPUID + * view, as seen by the guest kernel. + * + * The following situations and logic now applies: + * + * - Hardware without CPUID faulting support and native CPUID: + * There is nothing Xen can do here. The hosts XSAVE flag will + * leak through and Xen's OSXSAVE choice will leak through. + * + * In the case that the guest kernel has not set up OSXSAVE, only + * SSE will be set in xcr0, and guest userspace can't do too much + * damage itself. + * + * - Enlightened CPUID or CPUID faulting available: + * Xen can fully control what is seen here. Guest kernels need + * to see the leaked OSXSAVE via the enlightened path, but + * guest userspace and the native is given architectural + * behaviour. + * + * Emulated vs Faulted CPUID is distinguised based on whether a + * #UD or #GP is currently being serviced. + */ + /* OSXSAVE clear in policy. Fast-forward CR4 back in. */ + if ( (v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE) || + (regs->entry_vector == TRAP_invalid_op && + guest_kernel_mode(v, regs) && + (read_cr4() & X86_CR4_OSXSAVE)) ) + res->c |= cpufeat_mask(X86_FEATURE_OSXSAVE); + + /* + * At the time of writing, a PV domain is the only viable option + * for Dom0. Several interactions between dom0 and Xen for real + * hardware setup have unfortunately been implemented based on + * state which incorrectly leaked into dom0. + * + * These leaks are retained for backwards compatibility, but + * restricted to the hardware domains kernel only. + */ + if ( is_hardware_domain(d) && guest_kernel_mode(v, regs) ) + { + /* + * MONITOR never leaked into PV guests, as PV guests cannot + * use the MONITOR/MWAIT instructions. As such, they require + * the feature to not being present in emulated CPUID. + * + * Modern PVOPS Linux try to be cunning and use native CPUID + * to see if the hardware actually supports MONITOR, and by + * extension, deep C states. + * + * If the feature is seen, deep-C state information is + * obtained from the DSDT and handed back to Xen via the + * XENPF_set_processor_pminfo hypercall. + * + * This mechanism is incompatible with an HVM-based hardware + * domain, and also with CPUID Faulting. + * + * Luckily, Xen can be just as 'cunning', and distinguish an + * emulated CPUID from a faulted CPUID by whether a #UD or #GP + * fault is currently being serviced. Yuck... + */ + if ( cpu_has_monitor && regs->entry_vector == TRAP_gp_fault ) + res->c |= cpufeat_mask(X86_FEATURE_MONITOR); + + /* + * While MONITOR never leaked into PV guests, EIST always used + * to. + * + * Modern PVOPS Linux will only parse P state information from + * the DSDT and return it to Xen if EIST is seen in the + * emulated CPUID information. + */ + if ( cpu_has_eist ) + res->c |= cpufeat_mask(X86_FEATURE_EIST); + } + } + + /* Adjustments common with leaf 0x80000001. */ + goto common_dynamic_adjustments; + case 0x7: switch ( subleaf ) { @@ -967,6 +920,12 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, break; case 0x80000001: + /* SYSCALL is hidden outside of long mode on Intel. */ + if ( p->x86_vendor == X86_VENDOR_INTEL && + has_hvm_container_domain(d) && !hvm_long_mode_enabled(v) ) + res->d &= ~cpufeat_mask(X86_FEATURE_SYSCALL); + + common_dynamic_adjustments: /* Adjustments common with leaf 1. */ if ( has_hvm_container_domain(d) ) { /* Fast-forward MSR_APIC_BASE.EN. */ @@ -989,10 +948,6 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, */ if ( !hap_enabled(d) && !hvm_pae_enabled(v) ) res->d &= ~cpufeat_mask(X86_FEATURE_PSE36); - - /* SYSCALL is hidden outside of long mode on Intel. */ - if ( p->x86_vendor == X86_VENDOR_INTEL && !hvm_long_mode_enabled(v) ) - res->d &= ~cpufeat_mask(X86_FEATURE_SYSCALL); } else /* PV domain */ { diff --git a/xen/include/asm-x86/cpuid.h b/xen/include/asm-x86/cpuid.h index bc3fc7c..6d1990b 100644 --- a/xen/include/asm-x86/cpuid.h +++ b/xen/include/asm-x86/cpuid.h @@ -106,7 +106,11 @@ struct cpuid_policy uint32_t max_leaf, vendor_ebx, vendor_ecx, vendor_edx; /* Leaf 0x1 - Family/model/stepping and features. */ - uint32_t raw_fms, /* b */:32; + uint32_t raw_fms; + uint8_t :8, /* Brand ID. */ + clflush_size, /* Number of 8-byte blocks per cache line. */ + lppp, /* Logical processors per package. */ + apic_id; /* Initial APIC ID. */ union { uint32_t _1c; struct { DECL_BITFIELD(1c); };

[03/10] x86/cpuid: Handle leaf 0x1 in guest_cpuid()

Commit Message

Comments

Patch