@@ -53,7 +53,8 @@ extern const struct hypervisor_x86 *x86_hyper;
/* Recognized hypervisors */
extern const struct hypervisor_x86 x86_hyper_vmware;
extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
-extern const struct hypervisor_x86 x86_hyper_xen;
+extern const struct hypervisor_x86 x86_hyper_xen_pv;
+extern const struct hypervisor_x86 x86_hyper_xen_pvhvm;
extern const struct hypervisor_x86 x86_hyper_kvm;
extern void init_hypervisor(struct cpuinfo_x86 *c);
@@ -28,8 +28,11 @@
static const __initconst struct hypervisor_x86 * const hypervisors[] =
{
-#ifdef CONFIG_XEN
- &x86_hyper_xen,
+#ifdef CONFIG_XEN_PV
+ &x86_hyper_xen_pv,
+#endif
+#ifdef CONFIG_XEN_PVHVM
+ &x86_hyper_xen_pvhvm,
#endif
&x86_hyper_vmware,
&x86_hyper_ms_hyperv,
@@ -434,7 +434,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
-#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
/*
* On Xen PV, IOPL bits in pt_regs->flags have no effect, and
* current_pt_regs()->flags may not match the current task's
@@ -6,7 +6,6 @@ config XEN
bool "Xen guest support"
depends on PARAVIRT
select PARAVIRT_CLOCK
- select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
depends on X86_64 || (X86_32 && X86_PAE)
depends on X86_LOCAL_APIC && X86_TSC
@@ -15,18 +14,32 @@ config XEN
kernel to boot in a paravirtualized environment under the
Xen hypervisor.
+config XEN_PV
+ bool "Xen PV guest support"
+ default y
+ depends on XEN
+ help
+ Support running as a Xen PV guest.
+
config XEN_DOM0
- def_bool y
- depends on XEN && PCI_XEN && SWIOTLB_XEN
+ bool "Xen PV Dom0 support"
+ default y
+ depends on XEN_PV && PCI_XEN && SWIOTLB_XEN
depends on X86_IO_APIC && ACPI && PCI
+ select XEN_HAVE_PVMMU
+ help
+ Support running as a Xen PV Dom0 guest.
config XEN_PVHVM
- def_bool y
+ bool "Xen PVHVM guest support"
+ default y
depends on XEN && PCI && X86_LOCAL_APIC
+ help
+ Support running as a Xen PVHVM guest.
config XEN_512GB
bool "Limit Xen pv-domain memory to 512GB"
- depends on XEN && X86_64
+ depends on XEN_PV && X86_64
default y
help
Limit paravirtualized user domains to 512GB of RAM.
@@ -53,5 +66,5 @@ config XEN_DEBUG_FS
config XEN_PVH
bool "Support for running as a PVH guest"
- depends on X86_64 && XEN && XEN_PVHVM
+ depends on X86_64 && XEN_PV && XEN_PVHVM
def_bool n
@@ -10,11 +10,14 @@ nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_enlighten.o := $(nostackp)
CFLAGS_mmu.o := $(nostackp)
-obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
- time.o xen-asm.o xen-asm_$(BITS).o \
+obj-y := enlighten_common.o setup.o multicalls.o \
+ mmu.o irq.o time.o xen-asm.o xen-asm_$(BITS).o \
grant-table.o suspend.o platform-pci-unplug.o \
p2m.o apic.o pmu.o
+obj-$(CONFIG_XEN_PV) += enlighten.o
+obj-$(CONFIG_XEN_PVHVM) += enlighten_hvm.o
+
obj-$(CONFIG_EVENT_TRACING) += trace.o
obj-$(CONFIG_SMP) += smp.o
@@ -90,78 +90,13 @@
#include "multicalls.h"
#include "pmu.h"
-EXPORT_SYMBOL_GPL(hypercall_page);
-
-/*
- * Pointer to the xen_vcpu_info structure or
- * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info
- * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info
- * but if the hypervisor supports VCPUOP_register_vcpu_info then it can point
- * to xen_vcpu_info. The pointer is used in __xen_evtchn_do_upcall to
- * acknowledge pending events.
- * Also more subtly it is used by the patched version of irq enable/disable
- * e.g. xen_irq_enable_direct and xen_iret in PV mode.
- *
- * The desire to be able to do those mask/unmask operations as a single
- * instruction by using the per-cpu offset held in %gs is the real reason
- * vcpu info is in a per-cpu pointer and the original reason for this
- * hypercall.
- *
- */
-DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
-
-/*
- * Per CPU pages used if hypervisor supports VCPUOP_register_vcpu_info
- * hypercall. This can be used both in PV and PVHVM mode. The structure
- * overrides the default per_cpu(xen_vcpu, cpu) value.
- */
-DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
-
-/* Linux <-> Xen vCPU id mapping */
-DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
-EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
-
-enum xen_domain_type xen_domain_type = XEN_NATIVE;
-EXPORT_SYMBOL_GPL(xen_domain_type);
-
-unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
-EXPORT_SYMBOL(machine_to_phys_mapping);
-unsigned long machine_to_phys_nr;
-EXPORT_SYMBOL(machine_to_phys_nr);
-
-struct start_info *xen_start_info;
-EXPORT_SYMBOL_GPL(xen_start_info);
-
-struct shared_info xen_dummy_shared_info;
void *xen_initial_gdt;
RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
-static int xen_cpu_up_prepare(unsigned int cpu);
-static int xen_cpu_up_online(unsigned int cpu);
-static int xen_cpu_dead(unsigned int cpu);
-
-/*
- * Point at some empty memory to start with. We map the real shared_info
- * page as soon as fixmap is up and running.
- */
-struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
-
-/*
- * Flag to determine whether vcpu info placement is available on all
- * VCPUs. We assume it is to start with, and then set it to zero on
- * the first failure. This is because it can succeed on some VCPUs
- * and not others, since it can involve hypervisor memory allocation,
- * or because the guest failed to guarantee all the appropriate
- * constraints on all VCPUs (ie buffer can't cross a page boundary).
- *
- * Note that any particular CPU may be using a placed vcpu structure,
- * but we can only optimise if the all are.
- *
- * 0: not available, 1: available
- */
-static int have_vcpu_info_placement = 1;
+static int xen_cpu_up_prepare_pv(unsigned int cpu);
+static int xen_cpu_dead_pv(unsigned int cpu);
struct tls_descs {
struct desc_struct desc[3];
@@ -176,73 +111,6 @@ struct tls_descs {
*/
static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
-static void clamp_max_cpus(void)
-{
-#ifdef CONFIG_SMP
- if (setup_max_cpus > MAX_VIRT_CPUS)
- setup_max_cpus = MAX_VIRT_CPUS;
-#endif
-}
-
-void xen_vcpu_setup(int cpu)
-{
- struct vcpu_register_vcpu_info info;
- int err;
- struct vcpu_info *vcpup;
-
- BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
-
- /*
- * This path is called twice on PVHVM - first during bootup via
- * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being
- * hotplugged: cpu_up -> xen_hvm_cpu_notify.
- * As we can only do the VCPUOP_register_vcpu_info once lets
- * not over-write its result.
- *
- * For PV it is called during restore (xen_vcpu_restore) and bootup
- * (xen_setup_vcpu_info_placement). The hotplug mechanism does not
- * use this function.
- */
- if (xen_hvm_domain()) {
- if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
- return;
- }
- if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
- per_cpu(xen_vcpu, cpu) =
- &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
-
- if (!have_vcpu_info_placement) {
- if (cpu >= MAX_VIRT_CPUS)
- clamp_max_cpus();
- return;
- }
-
- vcpup = &per_cpu(xen_vcpu_info, cpu);
- info.mfn = arbitrary_virt_to_mfn(vcpup);
- info.offset = offset_in_page(vcpup);
-
- /* Check to see if the hypervisor will put the vcpu_info
- structure where we want it, which allows direct access via
- a percpu-variable.
- N.B. This hypercall can _only_ be called once per CPU. Subsequent
- calls will error out with -EINVAL. This is due to the fact that
- hypervisor has no unregister variant and this hypercall does not
- allow to over-write info.mfn and info.offset.
- */
- err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
- &info);
-
- if (err) {
- printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
- have_vcpu_info_placement = 0;
- clamp_max_cpus();
- } else {
- /* This cpu is using the registered vcpu info, even if
- later ones fail to. */
- per_cpu(xen_vcpu, cpu) = vcpup;
- }
-}
-
/*
* On restore, set the vcpu placement up again.
* If it fails, then we're in a bad state, since
@@ -1291,18 +1159,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.end_context_switch = xen_end_context_switch,
};
-static void xen_reboot(int reason)
-{
- struct sched_shutdown r = { .reason = reason };
- int cpu;
-
- for_each_online_cpu(cpu)
- xen_pmu_finish(cpu);
-
- if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
- BUG();
-}
-
static void xen_restart(char *msg)
{
xen_reboot(SHUTDOWN_reboot);
@@ -1330,25 +1186,6 @@ static void xen_crash_shutdown(struct pt_regs *regs)
xen_reboot(SHUTDOWN_crash);
}
-static int
-xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- if (!kexec_crash_loaded())
- xen_reboot(SHUTDOWN_crash);
- return NOTIFY_DONE;
-}
-
-static struct notifier_block xen_panic_block = {
- .notifier_call= xen_panic_event,
- .priority = INT_MIN
-};
-
-int xen_panic_handler_init(void)
-{
- atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
- return 0;
-}
-
static const struct machine_ops xen_machine_ops __initconst = {
.restart = xen_restart,
.halt = xen_machine_halt,
@@ -1537,24 +1374,6 @@ static void __init xen_dom0_set_legacy_features(void)
x86_platform.legacy.rtc = 1;
}
-static int xen_cpuhp_setup(void)
-{
- int rc;
-
- rc = cpuhp_setup_state_nocalls(CPUHP_XEN_PREPARE,
- "XEN_HVM_GUEST_PREPARE",
- xen_cpu_up_prepare, xen_cpu_dead);
- if (rc >= 0) {
- rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
- "XEN_HVM_GUEST_ONLINE",
- xen_cpu_up_online, NULL);
- if (rc < 0)
- cpuhp_remove_state_nocalls(CPUHP_XEN_PREPARE);
- }
-
- return rc >= 0 ? 0 : rc;
-}
-
/* First C function to be called on Xen boot */
asmlinkage __visible void __init xen_start_kernel(void)
{
@@ -1656,7 +1475,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
possible map and a non-dummy shared_info. */
per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
- WARN_ON(xen_cpuhp_setup());
+ WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
local_irq_disable();
early_boot_irqs_disabled = true;
@@ -1771,95 +1590,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
#endif
}
-void __ref xen_hvm_init_shared_info(void)
-{
- int cpu;
- struct xen_add_to_physmap xatp;
- static struct shared_info *shared_info_page = 0;
-
- if (!shared_info_page)
- shared_info_page = (struct shared_info *)
- extend_brk(PAGE_SIZE, PAGE_SIZE);
- xatp.domid = DOMID_SELF;
- xatp.idx = 0;
- xatp.space = XENMAPSPACE_shared_info;
- xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
- if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
- BUG();
-
- HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
-
- /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
- * page, we use it in the event channel upcall and in some pvclock
- * related functions. We don't need the vcpu_info placement
- * optimizations because we don't use any pv_mmu or pv_irq op on
- * HVM.
- * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
- * online but xen_hvm_init_shared_info is run at resume time too and
- * in that case multiple vcpus might be online. */
- for_each_online_cpu(cpu) {
- /* Leave it to be NULL. */
- if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
- continue;
- per_cpu(xen_vcpu, cpu) =
- &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
- }
-}
-
-#ifdef CONFIG_XEN_PVHVM
-static void __init init_hvm_pv_info(void)
-{
- int major, minor;
- uint32_t eax, ebx, ecx, edx, pages, msr, base;
- u64 pfn;
-
- base = xen_cpuid_base();
- cpuid(base + 1, &eax, &ebx, &ecx, &edx);
-
- major = eax >> 16;
- minor = eax & 0xffff;
- printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
-
- cpuid(base + 2, &pages, &msr, &ecx, &edx);
-
- pfn = __pa(hypercall_page);
- wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
-
- xen_setup_features();
-
- cpuid(base + 4, &eax, &ebx, &ecx, &edx);
- if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
- this_cpu_write(xen_vcpu_id, ebx);
- else
- this_cpu_write(xen_vcpu_id, smp_processor_id());
-
- pv_info.name = "Xen HVM";
-
- xen_domain_type = XEN_HVM_DOMAIN;
-}
-#endif
-
-static int xen_cpu_up_prepare(unsigned int cpu)
+static int xen_cpu_up_prepare_pv(unsigned int cpu)
{
int rc;
- if (xen_hvm_domain()) {
- /*
- * This can happen if CPU was offlined earlier and
- * offlining timed out in common_cpu_die().
- */
- if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
- xen_smp_intr_free(cpu);
- xen_uninit_lock_cpu(cpu);
- }
-
- if (cpu_acpi_id(cpu) != U32_MAX)
- per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
- else
- per_cpu(xen_vcpu_id, cpu) = cpu;
- xen_vcpu_setup(cpu);
- }
-
if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock))
xen_setup_timer(cpu);
@@ -1872,7 +1606,7 @@ static int xen_cpu_up_prepare(unsigned int cpu)
return 0;
}
-static int xen_cpu_dead(unsigned int cpu)
+static int xen_cpu_dead_pv(unsigned int cpu)
{
xen_smp_intr_free(cpu);
@@ -1882,84 +1616,6 @@ static int xen_cpu_dead(unsigned int cpu)
return 0;
}
-static int xen_cpu_up_online(unsigned int cpu)
-{
- xen_init_lock_cpu(cpu);
- return 0;
-}
-
-#ifdef CONFIG_XEN_PVHVM
-#ifdef CONFIG_KEXEC_CORE
-static void xen_hvm_shutdown(void)
-{
- native_machine_shutdown();
- if (kexec_in_progress)
- xen_reboot(SHUTDOWN_soft_reset);
-}
-
-static void xen_hvm_crash_shutdown(struct pt_regs *regs)
-{
- native_machine_crash_shutdown(regs);
- xen_reboot(SHUTDOWN_soft_reset);
-}
-#endif
-
-static void __init xen_hvm_guest_init(void)
-{
- if (xen_pv_domain())
- return;
-
- init_hvm_pv_info();
-
- xen_hvm_init_shared_info();
-
- xen_panic_handler_init();
-
- BUG_ON(!xen_feature(XENFEAT_hvm_callback_vector));
-
- xen_hvm_smp_init();
- WARN_ON(xen_cpuhp_setup());
- xen_unplug_emulated_devices();
- x86_init.irqs.intr_init = xen_init_IRQ;
- xen_hvm_init_time_ops();
- xen_hvm_init_mmu_ops();
-#ifdef CONFIG_KEXEC_CORE
- machine_ops.shutdown = xen_hvm_shutdown;
- machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
-#endif
-}
-#endif
-
-static bool xen_nopv = false;
-static __init int xen_parse_nopv(char *arg)
-{
- xen_nopv = true;
- return 0;
-}
-early_param("xen_nopv", xen_parse_nopv);
-
-static uint32_t __init xen_platform(void)
-{
- if (xen_nopv)
- return 0;
-
- return xen_cpuid_base();
-}
-
-bool xen_hvm_need_lapic(void)
-{
- if (xen_nopv)
- return false;
- if (xen_pv_domain())
- return false;
- if (!xen_hvm_domain())
- return false;
- if (xen_feature(XENFEAT_hvm_pirqs))
- return false;
- return true;
-}
-EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
-
static void xen_set_cpu_features(struct cpuinfo_x86 *c)
{
if (xen_pv_domain()) {
@@ -2007,28 +1663,18 @@ static void xen_pin_vcpu(int cpu)
}
}
-const struct hypervisor_x86 x86_hyper_xen = {
- .name = "Xen",
- .detect = xen_platform,
-#ifdef CONFIG_XEN_PVHVM
- .init_platform = xen_hvm_guest_init,
-#endif
- .x2apic_available = xen_x2apic_para_available,
- .set_cpu_features = xen_set_cpu_features,
- .pin_vcpu = xen_pin_vcpu,
-};
-EXPORT_SYMBOL(x86_hyper_xen);
-
-#ifdef CONFIG_HOTPLUG_CPU
-void xen_arch_register_cpu(int num)
+uint32_t __init xen_platform_pv(void)
{
- arch_register_cpu(num);
-}
-EXPORT_SYMBOL(xen_arch_register_cpu);
+ if (xen_pv_domain())
+ return xen_cpuid_base();
-void xen_arch_unregister_cpu(int num)
-{
- arch_unregister_cpu(num);
+ return 0;
}
-EXPORT_SYMBOL(xen_arch_unregister_cpu);
-#endif
+
+const struct hypervisor_x86 x86_hyper_xen_pv = {
+ .name = "Xen PV",
+ .detect = xen_platform_pv,
+ .set_cpu_features = xen_set_cpu_features,
+ .pin_vcpu = xen_pin_vcpu,
+};
+EXPORT_SYMBOL(x86_hyper_xen_pv);
new file mode 100644
@@ -0,0 +1,216 @@
+#include <linux/cpu.h>
+#include <linux/kexec.h>
+#include <linux/interrupt.h>
+
+#include <xen/features.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>
+#include <asm/cpu.h>
+
+#include "xen-ops.h"
+#include "pmu.h"
+#include "smp.h"
+
+EXPORT_SYMBOL_GPL(hypercall_page);
+
+/*
+ * Pointer to the xen_vcpu_info structure or
+ * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info
+ * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info
+ * but if the hypervisor supports VCPUOP_register_vcpu_info then it can point
+ * to xen_vcpu_info. The pointer is used in __xen_evtchn_do_upcall to
+ * acknowledge pending events.
+ * Also more subtly it is used by the patched version of irq enable/disable
+ * e.g. xen_irq_enable_direct and xen_iret in PV mode.
+ *
+ * The desire to be able to do those mask/unmask operations as a single
+ * instruction by using the per-cpu offset held in %gs is the real reason
+ * vcpu info is in a per-cpu pointer and the original reason for this
+ * hypercall.
+ *
+ */
+DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
+
+/*
+ * Per CPU pages used if hypervisor supports VCPUOP_register_vcpu_info
+ * hypercall. This can be used both in PV and PVHVM mode. The structure
+ * overrides the default per_cpu(xen_vcpu, cpu) value.
+ */
+DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
+
+/* Linux <-> Xen vCPU id mapping */
+DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
+EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
+
+enum xen_domain_type xen_domain_type = XEN_NATIVE;
+EXPORT_SYMBOL_GPL(xen_domain_type);
+
+unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
+EXPORT_SYMBOL(machine_to_phys_mapping);
+unsigned long machine_to_phys_nr;
+EXPORT_SYMBOL(machine_to_phys_nr);
+
+struct start_info *xen_start_info;
+EXPORT_SYMBOL_GPL(xen_start_info);
+
+struct shared_info xen_dummy_shared_info;
+
+/*
+ * Point at some empty memory to start with. We map the real shared_info
+ * page as soon as fixmap is up and running.
+ */
+struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
+
+/*
+ * Flag to determine whether vcpu info placement is available on all
+ * VCPUs. We assume it is to start with, and then set it to zero on
+ * the first failure. This is because it can succeed on some VCPUs
+ * and not others, since it can involve hypervisor memory allocation,
+ * or because the guest failed to guarantee all the appropriate
+ * constraints on all VCPUs (ie buffer can't cross a page boundary).
+ *
+ * Note that any particular CPU may be using a placed vcpu structure,
+ * but we can only optimise if the all are.
+ *
+ * 0: not available, 1: available
+ */
+int have_vcpu_info_placement = 1;
+
+static int xen_cpu_up_online(unsigned int cpu)
+{
+ xen_init_lock_cpu(cpu);
+ return 0;
+}
+
+int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int),
+ int (*cpu_dead_cb)(unsigned int))
+{
+ int rc;
+
+ rc = cpuhp_setup_state_nocalls(CPUHP_XEN_PREPARE,
+ "XEN_HVM_GUEST_PREPARE",
+ cpu_up_prepare_cb, cpu_dead_cb);
+ if (rc >= 0) {
+ rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "XEN_HVM_GUEST_ONLINE",
+ xen_cpu_up_online, NULL);
+ if (rc < 0)
+ cpuhp_remove_state_nocalls(CPUHP_XEN_PREPARE);
+ }
+
+ return rc >= 0 ? 0 : rc;
+}
+
+static void clamp_max_cpus(void)
+{
+#ifdef CONFIG_SMP
+ if (setup_max_cpus > MAX_VIRT_CPUS)
+ setup_max_cpus = MAX_VIRT_CPUS;
+#endif
+}
+
+void xen_vcpu_setup(int cpu)
+{
+ struct vcpu_register_vcpu_info info;
+ int err;
+ struct vcpu_info *vcpup;
+
+ BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
+
+ /*
+ * This path is called twice on PVHVM - first during bootup via
+ * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being
+ * hotplugged: cpu_up -> xen_hvm_cpu_notify.
+ * As we can only do the VCPUOP_register_vcpu_info once lets
+ * not over-write its result.
+ *
+ * For PV it is called during restore (xen_vcpu_restore) and bootup
+ * (xen_setup_vcpu_info_placement). The hotplug mechanism does not
+ * use this function.
+ */
+ if (xen_hvm_domain()) {
+ if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
+ return;
+ }
+ if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
+ per_cpu(xen_vcpu, cpu) =
+ &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
+
+ if (!have_vcpu_info_placement) {
+ if (cpu >= MAX_VIRT_CPUS)
+ clamp_max_cpus();
+ return;
+ }
+
+ vcpup = &per_cpu(xen_vcpu_info, cpu);
+ info.mfn = arbitrary_virt_to_mfn(vcpup);
+ info.offset = offset_in_page(vcpup);
+
+ /* Check to see if the hypervisor will put the vcpu_info
+ structure where we want it, which allows direct access via
+ a percpu-variable.
+ N.B. This hypercall can _only_ be called once per CPU. Subsequent
+ calls will error out with -EINVAL. This is due to the fact that
+ hypervisor has no unregister variant and this hypercall does not
+ allow to over-write info.mfn and info.offset.
+ */
+ err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
+ &info);
+
+ if (err) {
+ printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
+ have_vcpu_info_placement = 0;
+ clamp_max_cpus();
+ } else {
+ /* This cpu is using the registered vcpu info, even if
+ later ones fail to. */
+ per_cpu(xen_vcpu, cpu) = vcpup;
+ }
+}
+
+void xen_reboot(int reason)
+{
+ struct sched_shutdown r = { .reason = reason };
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ xen_pmu_finish(cpu);
+
+ if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
+ BUG();
+}
+
+static int
+xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ if (!kexec_crash_loaded())
+ xen_reboot(SHUTDOWN_crash);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block xen_panic_block = {
+ .notifier_call = xen_panic_event,
+ .priority = INT_MIN
+};
+
+int xen_panic_handler_init(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void xen_arch_register_cpu(int num)
+{
+ arch_register_cpu(num);
+}
+EXPORT_SYMBOL(xen_arch_register_cpu);
+
+void xen_arch_unregister_cpu(int num)
+{
+ arch_unregister_cpu(num);
+}
+EXPORT_SYMBOL(xen_arch_unregister_cpu);
+#endif
new file mode 100644
@@ -0,0 +1,202 @@
+#include <linux/cpu.h>
+#include <linux/kexec.h>
+
+#include <xen/features.h>
+#include <xen/events.h>
+#include <xen/interface/memory.h>
+
+#include <asm/reboot.h>
+#include <asm/setup.h>
+#include <asm/hypervisor.h>
+
+#include <asm/xen/cpuid.h>
+#include <asm/xen/hypervisor.h>
+
+#include "xen-ops.h"
+#include "mmu.h"
+#include "smp.h"
+
+void __ref xen_hvm_init_shared_info(void)
+{
+ int cpu;
+ struct xen_add_to_physmap xatp;
+ static struct shared_info *shared_info_page;
+
+ if (!shared_info_page)
+ shared_info_page = (struct shared_info *)
+ extend_brk(PAGE_SIZE, PAGE_SIZE);
+ xatp.domid = DOMID_SELF;
+ xatp.idx = 0;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+ BUG();
+
+ HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
+
+ /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
+ * page, we use it in the event channel upcall and in some pvclock
+ * related functions. We don't need the vcpu_info placement
+ * optimizations because we don't use any pv_mmu or pv_irq op on
+ * HVM.
+ * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
+ * online but xen_hvm_init_shared_info is run at resume time too and
+ * in that case multiple vcpus might be online. */
+ for_each_online_cpu(cpu) {
+ /* Leave it to be NULL. */
+ if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
+ continue;
+ per_cpu(xen_vcpu, cpu) =
+ &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
+ }
+}
+
+static void __init init_hvm_pv_info(void)
+{
+ int major, minor;
+ uint32_t eax, ebx, ecx, edx, pages, msr, base;
+ u64 pfn;
+
+ base = xen_cpuid_base();
+ cpuid(base + 1, &eax, &ebx, &ecx, &edx);
+
+ major = eax >> 16;
+ minor = eax & 0xffff;
+ printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
+
+ cpuid(base + 2, &pages, &msr, &ecx, &edx);
+
+ pfn = __pa(hypercall_page);
+ wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+
+ xen_setup_features();
+
+ cpuid(base + 4, &eax, &ebx, &ecx, &edx);
+ if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
+ this_cpu_write(xen_vcpu_id, ebx);
+ else
+ this_cpu_write(xen_vcpu_id, smp_processor_id());
+
+ pv_info.name = "Xen HVM";
+
+ xen_domain_type = XEN_HVM_DOMAIN;
+}
+
+#ifdef CONFIG_KEXEC_CORE
+static void xen_hvm_shutdown(void)
+{
+ native_machine_shutdown();
+ if (kexec_in_progress)
+ xen_reboot(SHUTDOWN_soft_reset);
+}
+
+static void xen_hvm_crash_shutdown(struct pt_regs *regs)
+{
+ native_machine_crash_shutdown(regs);
+ xen_reboot(SHUTDOWN_soft_reset);
+}
+#endif
+
+static int xen_cpu_up_prepare_hvm(unsigned int cpu)
+{
+ int rc;
+
+ /*
+ * This can happen if CPU was offlined earlier and
+ * offlining timed out in common_cpu_die().
+ */
+ if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
+ xen_smp_intr_free(cpu);
+ xen_uninit_lock_cpu(cpu);
+ }
+
+ if (cpu_acpi_id(cpu) != U32_MAX)
+ per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
+ else
+ per_cpu(xen_vcpu_id, cpu) = cpu;
+ xen_vcpu_setup(cpu);
+
+ if (xen_feature(XENFEAT_hvm_safe_pvclock))
+ xen_setup_timer(cpu);
+
+ rc = xen_smp_intr_init(cpu);
+ if (rc) {
+ WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
+ cpu, rc);
+ return rc;
+ }
+ return 0;
+}
+
+static int xen_cpu_dead_hvm(unsigned int cpu)
+{
+ xen_smp_intr_free(cpu);
+
+ if (xen_feature(XENFEAT_hvm_safe_pvclock))
+ xen_teardown_timer(cpu);
+
+ return 0;
+}
+
+void __init xen_hvm_guest_init(void)
+{
+ if (xen_pv_domain())
+ return;
+
+ init_hvm_pv_info();
+
+ xen_hvm_init_shared_info();
+
+ xen_panic_handler_init();
+
+ BUG_ON(!xen_feature(XENFEAT_hvm_callback_vector));
+
+ xen_hvm_smp_init();
+ WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_hvm, xen_cpu_dead_hvm));
+ xen_unplug_emulated_devices();
+ x86_init.irqs.intr_init = xen_init_IRQ;
+ xen_hvm_init_time_ops();
+ xen_hvm_init_mmu_ops();
+#ifdef CONFIG_KEXEC_CORE
+ machine_ops.shutdown = xen_hvm_shutdown;
+ machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
+#endif
+}
+
+bool xen_hvm_need_lapic(void)
+{
+ if (xen_nopv)
+ return false;
+ if (xen_pv_domain())
+ return false;
+ if (!xen_hvm_domain())
+ return false;
+ if (xen_feature(XENFEAT_hvm_pirqs))
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
+
+bool xen_nopv;
+static __init int xen_parse_nopv(char *arg)
+{
+ xen_nopv = true;
+ return 0;
+}
+early_param("xen_nopv", xen_parse_nopv);
+
+uint32_t __init xen_platform_hvm(void)
+{
+ if (xen_pv_domain() || xen_nopv)
+ return 0;
+
+ return xen_cpuid_base();
+}
+
+const struct hypervisor_x86 x86_hyper_xen_pvhvm = {
+ .name = "Xen",
+ .detect = xen_platform_hvm,
+ .init_platform = xen_hvm_guest_init,
+ .x2apic_available = xen_x2apic_para_available,
+};
+EXPORT_SYMBOL(x86_hyper_xen_pvhvm);
@@ -1295,7 +1295,9 @@ static void __init xen_pagetable_init(void)
if (!xen_feature(XENFEAT_auto_translated_physmap))
xen_remap_memory();
+#ifdef CONFIG_XEN_PV
xen_setup_shared_info();
+#endif
}
static void xen_write_cr2(unsigned long cr2)
{
@@ -253,6 +253,7 @@ int xen_smp_intr_init(unsigned int cpu)
return rc;
}
+#ifdef CONFIG_XEN_PV
static void __init xen_fill_possible_map(void)
{
int i, rc;
@@ -304,6 +305,7 @@ static void __init xen_filter_cpu_maps(void)
#endif
}
+#endif
static void __init xen_smp_prepare_boot_cpu(void)
{
@@ -311,6 +313,7 @@ static void __init xen_smp_prepare_boot_cpu(void)
native_smp_prepare_boot_cpu();
if (xen_pv_domain()) {
+#ifdef CONFIG_XEN_PV
if (!xen_feature(XENFEAT_writable_page_tables))
/* We've switched to the "real" per-cpu gdt, so make
* sure the old memory can be recycled. */
@@ -327,6 +330,7 @@ static void __init xen_smp_prepare_boot_cpu(void)
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
+#endif
}
/*
@@ -344,6 +348,7 @@ static void __init xen_smp_prepare_boot_cpu(void)
xen_init_spinlocks();
}
+#ifdef CONFIG_XEN_PV
static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
{
unsigned cpu;
@@ -525,22 +530,6 @@ static int xen_cpu_disable(void)
return 0;
}
-static void xen_cpu_die(unsigned int cpu)
-{
- while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up,
- xen_vcpu_nr(cpu), NULL)) {
- __set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ/10);
- }
-
- if (common_cpu_die(cpu) == 0) {
- xen_smp_intr_free(cpu);
- xen_uninit_lock_cpu(cpu);
- xen_teardown_timer(cpu);
- xen_pmu_finish(cpu);
- }
-}
-
static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
{
play_dead_common();
@@ -592,6 +581,23 @@ static void xen_stop_other_cpus(int wait)
{
smp_call_function(stop_self, NULL, wait);
}
+#endif /* CONFIG_XEN_PV */
+
+static void xen_cpu_die(unsigned int cpu)
+{
+ while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up,
+ xen_vcpu_nr(cpu), NULL)) {
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ/10);
+ }
+
+ if (common_cpu_die(cpu) == 0) {
+ xen_smp_intr_free(cpu);
+ xen_uninit_lock_cpu(cpu);
+ xen_teardown_timer(cpu);
+ xen_pmu_finish(cpu);
+ }
+}
static void xen_smp_send_reschedule(int cpu)
{
@@ -738,6 +744,7 @@ static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
+#ifdef CONFIG_XEN_PV
static const struct smp_ops xen_smp_ops __initconst = {
.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
.smp_prepare_cpus = xen_smp_prepare_cpus,
@@ -760,6 +767,7 @@ void __init xen_smp_init(void)
smp_ops = xen_smp_ops;
xen_fill_possible_map();
}
+#endif
static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
{
@@ -16,6 +16,7 @@
static void xen_pv_pre_suspend(void)
{
+#ifdef CONFIG_XEN_PV
xen_mm_pin_all();
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
@@ -28,6 +29,7 @@ static void xen_pv_pre_suspend(void)
if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
__pte_ma(0), 0))
BUG();
+#endif
}
static void xen_hvm_post_suspend(int suspend_cancelled)
@@ -48,6 +50,7 @@ static void xen_hvm_post_suspend(int suspend_cancelled)
static void xen_pv_post_suspend(int suspend_cancelled)
{
+#ifdef CONFIG_XEN_PV
xen_build_mfn_list_list();
xen_setup_shared_info();
@@ -66,6 +69,7 @@ static void xen_pv_post_suspend(int suspend_cancelled)
}
xen_mm_unpin_all();
+#endif
}
void xen_arch_pre_suspend(void)
@@ -35,6 +35,7 @@
#define PVH_FEATURES (0)
#endif
+#ifdef CONFIG_XEN_PV
__INIT
ENTRY(startup_xen)
cld
@@ -53,6 +54,7 @@ ENTRY(startup_xen)
jmp xen_start_kernel
__FINIT
+#endif
#ifdef CONFIG_XEN_PVH
/*
@@ -112,7 +114,9 @@ ENTRY(hypercall_page)
/* Map the p2m table to a 512GB-aligned user address. */
ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad PGDIR_SIZE)
#endif
+#ifdef CONFIG_XEN_PV
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
+#endif
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR)
ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) |
@@ -76,6 +76,8 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
bool xen_vcpu_stolen(int vcpu);
+extern int have_vcpu_info_placement;
+
void xen_vcpu_setup(int cpu);
void xen_setup_vcpu_info_placement(void);
@@ -15,6 +15,8 @@ static inline uint32_t xen_vcpu_nr(int cpu)
return per_cpu(xen_vcpu_id, cpu);
}
+extern bool xen_nopv;
+
void xen_arch_pre_suspend(void);
void xen_arch_post_suspend(int suspend_cancelled);
@@ -33,6 +35,11 @@ u64 xen_steal_clock(int cpu);
int xen_setup_shutdown_event(void);
+int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int),
+ int (*cpu_dead_cb)(unsigned int));
+
+void xen_reboot(int reason);
+
extern unsigned long *xen_contiguous_bitmap;
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
unsigned int address_bits,
Introduce CONFIG_XEN_PV config option and split enlighten.c into 3 files. Temporary add #ifdef CONFIG_XEN_PV to smp.c and mmu.c to not break the build and not make the patch even bigger. xen_cpu_up_prepare*/xen_cpu_die hooks require separation to support future xen_smp_intr_init() split. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> --- arch/x86/include/asm/hypervisor.h | 3 +- arch/x86/kernel/cpu/hypervisor.c | 7 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/xen/Kconfig | 25 ++- arch/x86/xen/Makefile | 7 +- arch/x86/xen/enlighten.c | 388 ++------------------------------------ arch/x86/xen/enlighten_common.c | 216 +++++++++++++++++++++ arch/x86/xen/enlighten_hvm.c | 202 ++++++++++++++++++++ arch/x86/xen/mmu.c | 2 + arch/x86/xen/smp.c | 40 ++-- arch/x86/xen/suspend.c | 4 + arch/x86/xen/xen-head.S | 4 + arch/x86/xen/xen-ops.h | 2 + include/xen/xen-ops.h | 7 + 14 files changed, 510 insertions(+), 399 deletions(-) create mode 100644 arch/x86/xen/enlighten_common.c create mode 100644 arch/x86/xen/enlighten_hvm.c