[RFC,07/16] x86/xen: make vcpu_info part of xenhost_t

Message ID	20190509172540.12398-8-ankur.a.arora@oracle.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <xen-devel-bounces@lists.xenproject.org> From: Ankur Arora <ankur.a.arora@oracle.com> To: linux-kernel@vger.kernel.org, xen-devel@lists.xenproject.org Date: Thu, 9 May 2019 10:25:31 -0700 Message-Id: <20190509172540.12398-8-ankur.a.arora@oracle.com> In-Reply-To: <20190509172540.12398-1-ankur.a.arora@oracle.com> References: <20190509172540.12398-1-ankur.a.arora@oracle.com> MIME-Version: 1.0 Subject: [Xen-devel] [RFC PATCH 07/16] x86/xen: make vcpu_info part of xenhost_t Precedence: list Cc: jgross@suse.com, sstabellini@kernel.org, konrad.wilk@oracle.com, ankur.a.arora@oracle.com, pbonzini@redhat.com, boris.ostrovsky@oracle.com, joao.m.martins@oracle.com Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" <xen-devel-bounces@lists.xenproject.org>
Series	xenhost support \| expand [RFC,00/16] xenhost support [RFC,01/16] x86/xen: add xenhost_t interface [RFC,02/16] x86/xen: cpuid support in xenhost_t [RFC,03/16] x86/xen: make hypercall_page generic [RFC,04/16] x86/xen: hypercall support for xenhost_t [RFC,05/16] x86/xen: add feature support in xenhost_t [RFC,06/16] x86/xen: add shared_info support to xenhost_t [RFC,07/16] x86/xen: make vcpu_info part of xenhost_t [RFC,08/16] x86/xen: irq/upcall handling with multiple xenhosts [RFC,09/16] xen/evtchn: support evtchn in xenhost_t [RFC,10/16] xen/balloon: support ballooning in xenhost_t [RFC,11/16] xen/grant-table: make grant-table xenhost aware [RFC,12/16] xen/xenbus: support xenbus frontend/backend with xenhost_t [RFC,13/16] drivers/xen: gnttab, evtchn, xenbus API changes [RFC,14/16] xen/blk: gnttab, evtchn, xenbus API changes [RFC,15/16] xen/net: gnttab, evtchn, xenbus API changes [RFC,16/16] xen/grant-table: host_addr fixup in mapping on xenhost_r0

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 20e0de844442..0dafbbc838ef 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -20,35 +20,6 @@ #include "smp.h" #include "pmu.h" -/* - * Pointer to the xen_vcpu_info structure or - * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info - * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info - * but if the hypervisor supports VCPUOP_register_vcpu_info then it can point - * to xen_vcpu_info. The pointer is used in __xen_evtchn_do_upcall to - * acknowledge pending events. - * Also more subtly it is used by the patched version of irq enable/disable - * e.g. xen_irq_enable_direct and xen_iret in PV mode. - * - * The desire to be able to do those mask/unmask operations as a single - * instruction by using the per-cpu offset held in %gs is the real reason - * vcpu info is in a per-cpu pointer and the original reason for this - * hypercall. - * - */ -DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); - -/* - * Per CPU pages used if hypervisor supports VCPUOP_register_vcpu_info - * hypercall. This can be used both in PV and PVHVM mode. The structure - * overrides the default per_cpu(xen_vcpu, cpu) value. - */ -DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); - -/* Linux <-> Xen vCPU id mapping */ -DEFINE_PER_CPU(uint32_t, xen_vcpu_id); -EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); - enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); @@ -112,12 +83,12 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), return rc >= 0 ? 0 : rc; } -static int xen_vcpu_setup_restore(int cpu) +static int xen_vcpu_setup_restore(xenhost_t *xh, int cpu) { int rc = 0; /* Any per_cpu(xen_vcpu) is stale, so reset it */ - xen_vcpu_info_reset(cpu); + xen_vcpu_info_reset(xh, cpu); /* * For PVH and PVHVM, setup online VCPUs only. The rest will @@ -125,7 +96,7 @@ static int xen_vcpu_setup_restore(int cpu) */ if (xen_pv_domain() || (xen_hvm_domain() && cpu_online(cpu))) { - rc = xen_vcpu_setup(cpu); + rc = xen_vcpu_setup(xh, cpu); } return rc; @@ -138,30 +109,42 @@ static int xen_vcpu_setup_restore(int cpu) */ void xen_vcpu_restore(void) { - int cpu, rc; + int cpu, rc = 0; + /* + * VCPU management is primarily the responsibility of xh_default and + * xh_remote only needs VCPUOP_register_vcpu_info. + * So, we do VPUOP_down and VCPUOP_up only on xh_default. + * + * (Currently, however, VCPUOP_register_vcpu_info is allowed only + * on VCPUs that are self or down, so we might need a new model + * there.) + */ for_each_possible_cpu(cpu) { bool other_cpu = (cpu != smp_processor_id()); bool is_up; + xenhost_t **xh; - if (xen_vcpu_nr(cpu) == XEN_VCPU_ID_INVALID) + if (xen_vcpu_nr(xh_default, cpu) == XEN_VCPU_ID_INVALID) continue; /* Only Xen 4.5 and higher support this. */ is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, - xen_vcpu_nr(cpu), NULL) > 0; + xen_vcpu_nr(xh_default, cpu), NULL) > 0; if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(xh_default, cpu), NULL)) BUG(); if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_runstate_info(cpu); - rc = xen_vcpu_setup_restore(cpu); - if (rc) - pr_emerg_once("vcpu restore failed for cpu=%d err=%d. " - "System will hang.\n", cpu, rc); + for_each_xenhost(xh) { + rc = xen_vcpu_setup_restore(*xh, cpu); + if (rc) + pr_emerg_once("vcpu restore failed for cpu=%d err=%d. " + "System will hang.\n", cpu, rc); + } /* * In case xen_vcpu_setup_restore() fails, do not bring up the * VCPU. This helps us avoid the resulting OOPS when the VCPU @@ -172,29 +155,29 @@ void xen_vcpu_restore(void) * VCPUs to come up. */ if (other_cpu && is_up && (rc == 0) && - HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) + HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(xh_default, cpu), NULL)) BUG(); } } -void xen_vcpu_info_reset(int cpu) +void xen_vcpu_info_reset(xenhost_t *xh, int cpu) { - if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) { - per_cpu(xen_vcpu, cpu) = - &xh_default->HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; + if (xen_vcpu_nr(xh, cpu) < MAX_VIRT_CPUS) { + xh->xen_vcpu[cpu] = + &xh->HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(xh, cpu)]; } else { /* Set to NULL so that if somebody accesses it we get an OOPS */ - per_cpu(xen_vcpu, cpu) = NULL; + xh->xen_vcpu[cpu] = NULL; } } -int xen_vcpu_setup(int cpu) +int xen_vcpu_setup(xenhost_t *xh, int cpu) { struct vcpu_register_vcpu_info info; int err; struct vcpu_info *vcpup; - BUG_ON(xh_default->HYPERVISOR_shared_info == &xen_dummy_shared_info); + BUG_ON(xh->HYPERVISOR_shared_info == &xen_dummy_shared_info); /* * This path is called on PVHVM at bootup (xen_hvm_smp_prepare_boot_cpu) @@ -208,12 +191,12 @@ int xen_vcpu_setup(int cpu) * use this function. */ if (xen_hvm_domain()) { - if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) + if (xh->xen_vcpu[cpu] == &xh->xen_vcpu_info[cpu]) return 0; } if (xen_have_vcpu_info_placement) { - vcpup = &per_cpu(xen_vcpu_info, cpu); + vcpup = &xh->xen_vcpu_info[cpu]; info.mfn = arbitrary_virt_to_mfn(vcpup); info.offset = offset_in_page(vcpup); @@ -227,8 +210,8 @@ int xen_vcpu_setup(int cpu) * hypercall does not allow to over-write info.mfn and * info.offset. */ - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, - xen_vcpu_nr(cpu), &info); + err = hypervisor_vcpu_op(xh, VCPUOP_register_vcpu_info, + xen_vcpu_nr(xh, cpu), &info); if (err) { pr_warn_once("register_vcpu_info failed: cpu=%d err=%d\n", @@ -239,14 +222,14 @@ int xen_vcpu_setup(int cpu) * This cpu is using the registered vcpu info, even if * later ones fail to. */ - per_cpu(xen_vcpu, cpu) = vcpup; + xh->xen_vcpu[cpu] = vcpup; } } if (!xen_have_vcpu_info_placement) - xen_vcpu_info_reset(cpu); + xen_vcpu_info_reset(xh, cpu); - return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0); + return ((xh->xen_vcpu[cpu] == NULL) ? -ENODEV : 0); } void xen_reboot(int reason) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 0e53363f9d1f..c1981a3e4989 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -5,6 +5,7 @@ #include <linux/kexec.h> #include <linux/memblock.h> +#include <xen/interface/xen.h> #include <xen/xenhost.h> #include <xen/features.h> #include <xen/events.h> @@ -72,22 +73,22 @@ static void __init xen_hvm_init_mem_mapping(void) { xenhost_t **xh; - for_each_xenhost(xh) + for_each_xenhost(xh) { xenhost_reset_shared_info(*xh); - /* - * The virtual address of the shared_info page has changed, so - * the vcpu_info pointer for VCPU 0 is now stale. - * - * The prepare_boot_cpu callback will re-initialize it via - * xen_vcpu_setup, but we can't rely on that to be called for - * old Xen versions (xen_have_vector_callback == 0). - * - * It is, in any case, bad to have a stale vcpu_info pointer - * so reset it now. - * For now, this uses xh_default implictly. - */ - xen_vcpu_info_reset(0); + /* + * The virtual address of the shared_info page has changed, so + * the vcpu_info pointer for VCPU 0 is now stale. + * + * The prepare_boot_cpu callback will re-initialize it via + * xen_vcpu_setup, but we can't rely on that to be called for + * old Xen versions (xen_have_vector_callback == 0). + * + * It is, in any case, bad to have a stale vcpu_info pointer + * so reset it now. + */ + xen_vcpu_info_reset(*xh, 0); + } } extern uint32_t xen_pv_cpuid_base(xenhost_t *xh); @@ -103,11 +104,32 @@ void xen_hvm_setup_hypercall_page(xenhost_t *xh) xh->hypercall_page = xen_hypercall_page; } +static void xen_hvm_probe_vcpu_id(xenhost_t *xh, int cpu) +{ + uint32_t eax, ebx, ecx, edx, base; + + base = xenhost_cpuid_base(xh); + + if (cpu == 0) { + cpuid(base + 4, &eax, &ebx, &ecx, &edx); + if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT) + xh->xen_vcpu_id[cpu] = ebx; + else + xh->xen_vcpu_id[cpu] = smp_processor_id(); + } else { + if (cpu_acpi_id(cpu) != U32_MAX) + xh->xen_vcpu_id[cpu] = cpu_acpi_id(cpu); + else + xh->xen_vcpu_id[cpu] = cpu; + } +} + xenhost_ops_t xh_hvm_ops = { .cpuid_base = xen_pv_cpuid_base, .setup_hypercall_page = xen_hvm_setup_hypercall_page, .setup_shared_info = xen_hvm_init_shared_info, .reset_shared_info = xen_hvm_reset_shared_info, + .probe_vcpu_id = xen_hvm_probe_vcpu_id, }; xenhost_ops_t xh_hvm_nested_ops = { @@ -116,7 +138,7 @@ xenhost_ops_t xh_hvm_nested_ops = { static void __init init_hvm_pv_info(void) { int major, minor; - uint32_t eax, ebx, ecx, edx, base; + uint32_t eax, base; xenhost_t **xh; base = xenhost_cpuid_base(xh_default); @@ -147,11 +169,8 @@ static void __init init_hvm_pv_info(void) if (xen_validate_features() == false) __xenhost_unregister(xenhost_r2); - cpuid(base + 4, &eax, &ebx, &ecx, &edx); - if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT) - this_cpu_write(xen_vcpu_id, ebx); - else - this_cpu_write(xen_vcpu_id, smp_processor_id()); + for_each_xenhost(xh) + xenhost_probe_vcpu_id(*xh, smp_processor_id()); } #ifdef CONFIG_KEXEC_CORE @@ -172,6 +191,7 @@ static void xen_hvm_crash_shutdown(struct pt_regs *regs) static int xen_cpu_up_prepare_hvm(unsigned int cpu) { int rc = 0; + xenhost_t **xh; /* * This can happen if CPU was offlined earlier and @@ -182,13 +202,12 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) xen_uninit_lock_cpu(cpu); } - if (cpu_acpi_id(cpu) != U32_MAX) - per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); - else - per_cpu(xen_vcpu_id, cpu) = cpu; - rc = xen_vcpu_setup(cpu); - if (rc) - return rc; + for_each_xenhost(xh) { + xenhost_probe_vcpu_id(*xh, cpu); + rc = xen_vcpu_setup(*xh, cpu); + if (rc) + return rc; + } if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); @@ -229,15 +248,15 @@ static void __init xen_hvm_guest_init(void) for_each_xenhost(xh) { reserve_shared_info(*xh); xenhost_setup_shared_info(*xh); + + /* + * xen_vcpu is a pointer to the vcpu_info struct in the + * shared_info page, we use it in the event channel upcall + * and in some pvclock related functions. + */ + xen_vcpu_info_reset(*xh, 0); } - /* - * xen_vcpu is a pointer to the vcpu_info struct in the shared_info - * page, we use it in the event channel upcall and in some pvclock - * related functions. - * For now, this uses xh_default implictly. - */ - xen_vcpu_info_reset(0); xen_panic_handler_init(); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 1a9eded4b76b..5f6a1475ec0c 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -36,8 +36,8 @@ #include <xen/xen.h> #include <xen/events.h> -#include <xen/xenhost.h> #include <xen/interface/xen.h> +#include <xen/xenhost.h> #include <xen/interface/version.h> #include <xen/interface/physdev.h> #include <xen/interface/vcpu.h> @@ -126,12 +126,12 @@ static void __init xen_pv_init_platform(void) populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP)); - for_each_xenhost(xh) + for_each_xenhost(xh) { xenhost_setup_shared_info(*xh); - /* xen clock uses per-cpu vcpu_info, need to init it for boot cpu */ - /* For now this uses xh_default implicitly. */ - xen_vcpu_info_reset(0); + /* xen clock uses per-cpu vcpu_info, need to init it for boot cpu */ + xen_vcpu_info_reset(*xh, 0); + } /* pvclock is in shared info area */ xen_init_time_ops(); @@ -973,28 +973,31 @@ static void xen_write_msr(unsigned int msr, unsigned low, unsigned high) /* This is called once we have the cpu_possible_mask */ void __init xen_setup_vcpu_info_placement(void) { + xenhost_t **xh; int cpu; for_each_possible_cpu(cpu) { - /* Set up direct vCPU id mapping for PV guests. */ - per_cpu(xen_vcpu_id, cpu) = cpu; + for_each_xenhost(xh) { + xenhost_probe_vcpu_id(*xh, cpu); - /* - * xen_vcpu_setup(cpu) can fail -- in which case it - * falls back to the shared_info version for cpus - * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS. - * - * xen_cpu_up_prepare_pv() handles the rest by failing - * them in hotplug. - */ - (void) xen_vcpu_setup(cpu); + /* + * xen_vcpu_setup(cpu) can fail -- in which case it + * falls back to the shared_info version for cpus + * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS. + * + * xen_cpu_up_prepare_pv() handles the rest by failing + * them in hotplug. + */ + (void) xen_vcpu_setup(*xh, cpu); + } } /* * xen_vcpu_setup managed to place the vcpu_info within the * percpu area for all cpus, so make use of it. */ - if (xen_have_vcpu_info_placement) { + if (xen_have_vcpu_info_placement && false) { + /* Disable direct access until we have proper pcpu data structures. */ pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); pv_ops.irq.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); @@ -1110,6 +1113,11 @@ static unsigned char xen_get_nmi_reason(void) { unsigned char reason = 0; + /* + * We could get this information from all the xenhosts and OR it. + * But, the remote xenhost isn't really expected to send us NMIs. + */ + /* Construct a value which looks like it came from port 0x61. */ if (test_bit(_XEN_NMIREASON_io_error, &xh_default->HYPERVISOR_shared_info->arch.nmi_reason)) @@ -1222,6 +1230,12 @@ static void xen_pv_reset_shared_info(xenhost_t *xh) BUG(); } +void xen_pv_probe_vcpu_id(xenhost_t *xh, int cpu) +{ + /* Set up direct vCPU id mapping for PV guests. */ + xh->xen_vcpu_id[cpu] = cpu; +} + xenhost_ops_t xh_pv_ops = { .cpuid_base = xen_pv_cpuid_base, @@ -1229,6 +1243,8 @@ xenhost_ops_t xh_pv_ops = { .setup_shared_info = xen_pv_setup_shared_info, .reset_shared_info = xen_pv_reset_shared_info, + + .probe_vcpu_id = xen_pv_probe_vcpu_id, }; xenhost_ops_t xh_pv_nested_ops = { @@ -1283,7 +1299,9 @@ asmlinkage __visible void __init xen_start_kernel(void) * Don't do the full vcpu_info placement stuff until we have * the cpu_possible_mask and a non-dummy shared_info. */ - xen_vcpu_info_reset(0); + for_each_xenhost(xh) { + xen_vcpu_info_reset(*xh, 0); + } x86_platform.get_nmi_reason = xen_get_nmi_reason; @@ -1328,7 +1346,9 @@ asmlinkage __visible void __init xen_start_kernel(void) get_cpu_address_sizes(&boot_cpu_data); /* Let's presume PV guests always boot on vCPU with id 0. */ - per_cpu(xen_vcpu_id, 0) = 0; + /* Note: we should be doing this before xen_vcpu_info_reset above. */ + for_each_xenhost(xh) + xenhost_probe_vcpu_id(*xh, 0); idt_setup_early_handler(); @@ -1485,7 +1505,7 @@ static int xen_cpu_up_prepare_pv(unsigned int cpu) { int rc; - if (per_cpu(xen_vcpu, cpu) == NULL) + if (xh_default->xen_vcpu[cpu] == NULL) return -ENODEV; xen_setup_timer(cpu); diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 50277dfbdf30..3f98526dd041 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -2,13 +2,14 @@ #include <linux/acpi.h> #include <xen/hvc-console.h> +#include <xen/interface/xen.h> #include <asm/io_apic.h> #include <asm/hypervisor.h> #include <asm/e820/api.h> -#include <xen/xen.h> #include <xen/xenhost.h> +#include <xen/xen.h> #include <asm/xen/interface.h> #include <asm/xen/hypercall.h> diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 850c93f346c7..38ad1a1c4763 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -29,7 +29,7 @@ asmlinkage __visible unsigned long xen_save_fl(void) struct vcpu_info *vcpu; unsigned long flags; - vcpu = this_cpu_read(xen_vcpu); + vcpu = xh_default->xen_vcpu[smp_processor_id()]; /* flag has opposite sense of mask */ flags = !vcpu->evtchn_upcall_mask; @@ -51,7 +51,7 @@ __visible void xen_restore_fl(unsigned long flags) /* See xen_irq_enable() for why preemption must be disabled. */ preempt_disable(); - vcpu = this_cpu_read(xen_vcpu); + vcpu = xh_default->xen_vcpu[smp_processor_id()]; vcpu->evtchn_upcall_mask = flags; if (flags == 0) { @@ -70,7 +70,7 @@ asmlinkage __visible void xen_irq_disable(void) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1; + xh_default->xen_vcpu[smp_processor_id()]->evtchn_upcall_mask = 1; preempt_enable_no_resched(); } PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); @@ -86,7 +86,7 @@ asmlinkage __visible void xen_irq_enable(void) */ preempt_disable(); - vcpu = this_cpu_read(xen_vcpu); + vcpu = xh_default->xen_vcpu[smp_processor_id()]; vcpu->evtchn_upcall_mask = 0; /* Doesn't matter if we get preempted here, because any @@ -111,7 +111,7 @@ static void xen_halt(void) { if (irqs_disabled()) HYPERVISOR_vcpu_op(VCPUOP_down, - xen_vcpu_nr(smp_processor_id()), NULL); + xen_vcpu_nr(xh_default, smp_processor_id()), NULL); else xen_safe_halt(); } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 0f4fe206dcc2..e99af51ab481 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1304,17 +1304,17 @@ static void __init xen_pagetable_init(void) } static void xen_write_cr2(unsigned long cr2) { - this_cpu_read(xen_vcpu)->arch.cr2 = cr2; + xh_default->xen_vcpu[smp_processor_id()]->arch.cr2 = cr2; } static unsigned long xen_read_cr2(void) { - return this_cpu_read(xen_vcpu)->arch.cr2; + return xh_default->xen_vcpu[smp_processor_id()]->arch.cr2; } unsigned long xen_read_cr2_direct(void) { - return this_cpu_read(xen_vcpu_info.arch.cr2); + return xh_default->xen_vcpu_info[smp_processor_id()].arch.cr2; } static noinline void xen_flush_tlb(void) diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 33293ce01d8d..04f9b2e92f06 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -4,6 +4,7 @@ #include <linux/dma-mapping.h> #include <linux/pci.h> +#include <xen/interface/xen.h> #include <xen/swiotlb-xen.h> #include <asm/xen/hypervisor.h> diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d5f303c0e656..ec8f22a54f6e 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -19,6 +19,7 @@ #include <asm/setup.h> #include <asm/acpi.h> #include <asm/numa.h> +#include <xen/interface/xen.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 7a43b2ae19f1..867524be0065 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -6,6 +6,7 @@ #include <linux/percpu.h> #include <xen/events.h> +#include <xen/xenhost.h> #include <xen/hvc-console.h> #include "xen-ops.h" @@ -129,7 +130,10 @@ void __init xen_smp_cpus_done(unsigned int max_cpus) return; for_each_online_cpu(cpu) { - if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) + xenhost_t **xh; + + if ((xen_vcpu_nr(xh_default, cpu) < MAX_VIRT_CPUS) && + (!xh_remote || (xen_vcpu_nr(xh_remote, cpu) < MAX_VIRT_CPUS))) continue; rc = cpu_down(cpu); @@ -138,7 +142,8 @@ void __init xen_smp_cpus_done(unsigned int max_cpus) /* * Reset vcpu_info so this cpu cannot be onlined again. */ - xen_vcpu_info_reset(cpu); + for_each_xenhost(xh) + xen_vcpu_info_reset(*xh, cpu); count++; } else { pr_warn("%s: failed to bring CPU %d down, error %d\n", diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index f8d39440b292..5e7f591bfdd9 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -9,6 +9,7 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) { + xenhost_t **xh; BUG_ON(smp_processor_id() != 0); native_smp_prepare_boot_cpu(); @@ -16,7 +17,8 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) * Setup vcpu_info for boot CPU. Secondary CPUs get their vcpu_info * in xen_cpu_up_prepare_hvm(). */ - xen_vcpu_setup(0); + for_each_xenhost(xh) + xen_vcpu_setup(*xh, 0); /* * The alternative logic (which patches the unlock/lock) runs before @@ -29,6 +31,7 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) { + xenhost_t **xh; int cpu; native_smp_prepare_cpus(max_cpus); @@ -36,12 +39,14 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) xen_init_lock_cpu(0); - for_each_possible_cpu(cpu) { - if (cpu == 0) - continue; + for_each_xenhost(xh) { + for_each_possible_cpu(cpu) { + if (cpu == 0) + continue; - /* Set default vcpu_id to make sure that we don't use cpu-0's */ - per_cpu(xen_vcpu_id, cpu) = XEN_VCPU_ID_INVALID; + /* Set default vcpu_id to make sure that we don't use cpu-0's */ + (*xh)->xen_vcpu_id[cpu] = XEN_VCPU_ID_INVALID; + } } } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 145506f9fdbe..6d9c3e6611ef 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -350,7 +350,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); - if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(xh_default, cpu), ctxt)) BUG(); kfree(ctxt); @@ -374,7 +374,7 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) return rc; /* make sure interrupts start blocked */ - per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; + xh_default->xen_vcpu[cpu]->evtchn_upcall_mask = 1; rc = cpu_initialize_context(cpu, idle); if (rc) @@ -382,7 +382,7 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) xen_pmu_init(cpu); - rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL); + rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(xh_default, cpu), NULL); BUG_ON(rc); while (cpu_report_state(cpu) != CPU_ONLINE) @@ -407,7 +407,7 @@ static int xen_pv_cpu_disable(void) static void xen_pv_cpu_die(unsigned int cpu) { while (HYPERVISOR_vcpu_op(VCPUOP_is_up, - xen_vcpu_nr(cpu), NULL)) { + xen_vcpu_nr(xh_default, cpu), NULL)) { __set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(HZ/10); } @@ -423,7 +423,7 @@ static void xen_pv_cpu_die(unsigned int cpu) static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */ { play_dead_common(); - HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL); + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(xh_default, smp_processor_id()), NULL); cpu_bringup(); /* * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down) @@ -464,7 +464,7 @@ static void stop_self(void *v) set_cpu_online(cpu, false); - HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL); + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(xh_default, cpu), NULL); BUG(); } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index d4bb1f8b4f58..217bc4de07ee 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -18,12 +18,12 @@ #include <linux/timekeeper_internal.h> #include <asm/pvclock.h> +#include <xen/interface/xen.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> #include <xen/events.h> #include <xen/features.h> -#include <xen/interface/xen.h> #include <xen/interface/vcpu.h> #include "xen-ops.h" @@ -48,7 +48,7 @@ static u64 xen_clocksource_read(void) u64 ret; preempt_disable_notrace(); - src = &__this_cpu_read(xen_vcpu)->time; + src = &xh_default->xen_vcpu[smp_processor_id()]->time; ret = pvclock_clocksource_read(src); preempt_enable_notrace(); return ret; @@ -70,9 +70,10 @@ static void xen_read_wallclock(struct timespec64 *ts) struct pvclock_wall_clock *wall_clock = &(s->wc); struct pvclock_vcpu_time_info *vcpu_time; - vcpu_time = &get_cpu_var(xen_vcpu)->time; + preempt_disable_notrace(); + vcpu_time = &xh_default->xen_vcpu[smp_processor_id()]->time; pvclock_read_wallclock(wall_clock, vcpu_time, ts); - put_cpu_var(xen_vcpu); + preempt_enable_notrace(); } static void xen_get_wallclock(struct timespec64 *now) @@ -233,9 +234,9 @@ static int xen_vcpuop_shutdown(struct clock_event_device *evt) { int cpu = smp_processor_id(); - if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu), + if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(xh_default, cpu), NULL) || - HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), + HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(xh_default, cpu), NULL)) BUG(); @@ -246,7 +247,7 @@ static int xen_vcpuop_set_oneshot(struct clock_event_device *evt) { int cpu = smp_processor_id(); - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(xh_default, cpu), NULL)) BUG(); @@ -266,7 +267,7 @@ static int xen_vcpuop_set_next_event(unsigned long delta, /* Get an event anyway, even if the timeout is already expired */ single.flags = 0; - ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu), + ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(xh_default, cpu), &single); BUG_ON(ret != 0); @@ -366,7 +367,7 @@ void xen_timer_resume(void) for_each_online_cpu(cpu) { if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, - xen_vcpu_nr(cpu), NULL)) + xen_vcpu_nr(xh_default, cpu), NULL)) BUG(); } } @@ -482,7 +483,7 @@ static void __init xen_time_init(void) clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(xh_default, cpu), NULL) == 0) { /* Successfully turned off 100Hz tick, so we have the vcpuop-based timer interface */ @@ -500,7 +501,7 @@ static void __init xen_time_init(void) * We check ahead on the primary time info if this * bit is supported hence speeding up Xen clocksource. */ - pvti = &__this_cpu_read(xen_vcpu)->time; + pvti = &xh_default->xen_vcpu[smp_processor_id()]->time; if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) { pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); xen_setup_vsyscall_time_info(); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 5085ce88a8d7..96fd7edea7e9 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -22,7 +22,6 @@ extern void *xen_initial_gdt; struct trap_info; void xen_copy_trap_info(struct trap_info *traps); -DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); @@ -76,8 +75,8 @@ bool xen_vcpu_stolen(int vcpu); extern int xen_have_vcpu_info_placement; -int xen_vcpu_setup(int cpu); -void xen_vcpu_info_reset(int cpu); +int xen_vcpu_setup(xenhost_t *xh, int cpu); +void xen_vcpu_info_reset(xenhost_t *xh, int cpu); void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 117e76b2f939..ae497876fe41 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -884,7 +884,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) irq_set_chip_and_handler_name(irq, &xen_percpu_chip, handle_percpu_irq, "ipi"); - bind_ipi.vcpu = xen_vcpu_nr(cpu); + bind_ipi.vcpu = xen_vcpu_nr(xh_default, cpu); if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi) != 0) BUG(); @@ -937,7 +937,7 @@ static int find_virq(unsigned int virq, unsigned int cpu) continue; if (status.status != EVTCHNSTAT_virq) continue; - if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) { + if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(xh_default, cpu)) { rc = port; break; } @@ -980,7 +980,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) handle_edge_irq, "virq"); bind_virq.virq = virq; - bind_virq.vcpu = xen_vcpu_nr(cpu); + bind_virq.vcpu = xen_vcpu_nr(xh_default, cpu); ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); if (ret == 0) @@ -1200,7 +1200,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) #ifdef CONFIG_X86 if (unlikely(vector == XEN_NMI_VECTOR)) { - int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu), + int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(xh_default, cpu), NULL); if (rc < 0) printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc); @@ -1306,7 +1306,7 @@ int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu) /* Send future instances of this interrupt to other vcpu. */ bind_vcpu.port = evtchn; - bind_vcpu.vcpu = xen_vcpu_nr(tcpu); + bind_vcpu.vcpu = xen_vcpu_nr(xh_default, tcpu); /* * Mask the event while changing the VCPU binding to prevent @@ -1451,7 +1451,7 @@ static void restore_cpu_virqs(unsigned int cpu) /* Get a new binding from Xen. */ bind_virq.virq = virq; - bind_virq.vcpu = xen_vcpu_nr(cpu); + bind_virq.vcpu = xen_vcpu_nr(xh_default, cpu); if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq) != 0) BUG(); @@ -1475,7 +1475,7 @@ static void restore_cpu_ipis(unsigned int cpu) BUG_ON(ipi_from_irq(irq) != ipi); /* Get a new binding from Xen. */ - bind_ipi.vcpu = xen_vcpu_nr(cpu); + bind_ipi.vcpu = xen_vcpu_nr(xh_default, cpu); if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi) != 0) BUG(); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 76b318e88382..eed766219dd0 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -113,7 +113,7 @@ static int init_control_block(int cpu, init_control.control_gfn = virt_to_gfn(control_block); init_control.offset = 0; - init_control.vcpu = xen_vcpu_nr(cpu); + init_control.vcpu = xen_vcpu_nr(xh_default, cpu); return HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control); } diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 6d1a5e58968f..66622109f2be 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -475,7 +475,7 @@ static long evtchn_ioctl(struct file *file, break; bind_virq.virq = bind.virq; - bind_virq.vcpu = xen_vcpu_nr(0); + bind_virq.vcpu = xen_vcpu_nr(xh_default, 0); rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); if (rc != 0) diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 0968859c29d0..feee74bbab0a 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -164,7 +164,7 @@ void xen_setup_runstate_info(int cpu) area.addr.v = &per_cpu(xen_runstate, cpu); if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, - xen_vcpu_nr(cpu), &area)) + xen_vcpu_nr(xh_default, cpu), &area)) BUG(); } diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 4969817124a8..75be9059893f 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -9,12 +9,9 @@ #include <asm/xen/interface.h> #include <xen/interface/vcpu.h> -DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); - -DECLARE_PER_CPU(uint32_t, xen_vcpu_id); -static inline uint32_t xen_vcpu_nr(int cpu) +static inline uint32_t xen_vcpu_nr(xenhost_t *xh, int cpu) { - return per_cpu(xen_vcpu_id, cpu); + return xh->xen_vcpu_id[cpu]; } #define XEN_VCPU_ID_INVALID U32_MAX diff --git a/include/xen/xenhost.h b/include/xen/xenhost.h index 7c19c361d16e..f6092a8987f1 100644 --- a/include/xen/xenhost.h +++ b/include/xen/xenhost.h @@ -90,6 +90,28 @@ typedef struct { struct shared_info *HYPERVISOR_shared_info; unsigned long shared_info_pfn; }; + + struct { + /* + * Events on xen-evtchn ports show up in struct vcpu_info. + * With multiple xenhosts, the evtchn-port numbering space that + * was global so far is now attached to a xenhost. + * + * So, now we allocate vcpu_info for each processor (we had space + * for only MAX_VIRT_CPUS in the shared_info above.) + * + * FIXME we statically allocate for NR_CPUS because alloc_percpu() + * isn't available at PV boot time but this is slow. + */ + struct vcpu_info xen_vcpu_info[NR_CPUS]; + struct vcpu_info *xen_vcpu[NR_CPUS]; + + /* + * Different xenhosts might have different Linux <-> Xen vCPU-id + * mapping. + */ + uint32_t xen_vcpu_id[NR_CPUS]; + }; } xenhost_t; typedef struct xenhost_ops { @@ -139,6 +161,26 @@ typedef struct xenhost_ops { */ void (*setup_shared_info)(xenhost_t *xenhost); void (*reset_shared_info)(xenhost_t *xenhost); + + /* + * vcpu_info, vcpu_id: needs to be setup early -- all IRQ code accesses + * relevant bits. + * + * vcpu_id is probed on PVH/PVHVM via xen_cpuid(). For PV, its direct + * mapped to smp_processor_id(). + * + * This is part of xenhost_t because we might be registered with two + * different xenhosts and both of those might have their own vcpu + * numbering. + * + * After the vcpu numbering is identified, we can go ahead and register + * vcpu_info with the xenhost; on the default xenhost this happens via + * the register_vcpu_info hypercall. + * + * Once vcpu_info is setup (this or the shared_info version), it would + * get accessed via pv_ops.irq.* and the evtchn logic. + */ + void (*probe_vcpu_id)(xenhost_t *xenhost, int cpu); } xenhost_ops_t; extern xenhost_t *xh_default, *xh_remote; @@ -185,4 +227,9 @@ static inline void xenhost_reset_shared_info(xenhost_t *xh) (xh->ops->reset_shared_info)(xh); } +static inline void xenhost_probe_vcpu_id(xenhost_t *xh, int cpu) +{ + (xh->ops->probe_vcpu_id)(xh, cpu); +} + #endif /* __XENHOST_H */

[RFC,07/16] x86/xen: make vcpu_info part of xenhost_t

Commit Message

Comments

Patch