@@ -34,7 +34,7 @@ obj-$(CONFIG_XEN_PV) += enlighten_pv.o
obj-$(CONFIG_XEN_PV) += mmu_pv.o
obj-$(CONFIG_XEN_PV) += irq.o
obj-$(CONFIG_XEN_PV) += multicalls.o
-obj-$(CONFIG_XEN_PV) += xen-asm.o
+obj-n += xen-asm.o
obj-$(CONFIG_XEN_PV) += xen-asm_$(BITS).o
obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o
@@ -996,8 +996,9 @@ void __init xen_setup_vcpu_info_placement(void)
* xen_vcpu_setup managed to place the vcpu_info within the
* percpu area for all cpus, so make use of it.
*/
+#if 0
+ /* Disable direct access for now. */
if (xen_have_vcpu_info_placement && false) {
- /* Disable direct access until we have proper pcpu data structures. */
pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
pv_ops.irq.restore_fl =
__PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
@@ -1007,6 +1008,7 @@ void __init xen_setup_vcpu_info_placement(void)
__PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
pv_ops.mmu.read_cr2 = xen_read_cr2_direct;
}
+#endif
}
static const struct pv_info xen_info __initconst = {
@@ -19,9 +19,9 @@
* callback mask. We do this in a very simple manner, by making a call
* down into Xen. The pending flag will be checked by Xen on return.
*/
-void xen_force_evtchn_callback(void)
+void xen_force_evtchn_callback(xenhost_t *xh)
{
- (void)HYPERVISOR_xen_version(0, NULL);
+ (void)hypervisor_xen_version(xh, 0, NULL);
}
asmlinkage __visible unsigned long xen_save_fl(void)
@@ -29,6 +29,21 @@ asmlinkage __visible unsigned long xen_save_fl(void)
struct vcpu_info *vcpu;
unsigned long flags;
+ /*
+ * In scenarios with more than one xenhost, the primary xenhost
+ * is responsible for all the upcalls, with the remote xenhost
+ * bouncing its upcalls through it (see comment in
+ * cpu_initialize_context().)
+ *
+ * To minimize unnecessary upcalls, the remote xenhost still looks at
+ * the value of vcpu_info->evtchn_upcall_mask, so we still set and reset
+ * that.
+ *
+ * The fact that the upcall itself is gated by the default xenhost,
+ * also helps in simplifying the logic here because we don't have to
+ * worry about guaranteeing atomicity with updates to
+ * xh_remote->vcpu_info->evtchn_upcall_mask.
+ */
vcpu = xh_default->xen_vcpu[smp_processor_id()];
/* flag has opposite sense of mask */
@@ -38,26 +53,34 @@ asmlinkage __visible unsigned long xen_save_fl(void)
-0 -> 0x00000000
-1 -> 0xffffffff
*/
- return (-flags) & X86_EFLAGS_IF;
+ return ((-flags) & X86_EFLAGS_IF);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);
__visible void xen_restore_fl(unsigned long flags)
{
struct vcpu_info *vcpu;
+ xenhost_t **xh;
/* convert from IF type flag */
flags = !(flags & X86_EFLAGS_IF);
/* See xen_irq_enable() for why preemption must be disabled. */
preempt_disable();
- vcpu = xh_default->xen_vcpu[smp_processor_id()];
- vcpu->evtchn_upcall_mask = flags;
+ for_each_xenhost(xh) {
+ vcpu = (*xh)->xen_vcpu[smp_processor_id()];
+ vcpu->evtchn_upcall_mask = flags;
+ }
if (flags == 0) {
barrier(); /* unmask then check (avoid races) */
- if (unlikely(vcpu->evtchn_upcall_pending))
- xen_force_evtchn_callback();
+ for_each_xenhost(xh) {
+ /* Preemption is disabled so we should not have
+ * gotten moved to a different VCPU. */
+ vcpu = (*xh)->xen_vcpu[smp_processor_id()];
+ if (unlikely(vcpu->evtchn_upcall_pending))
+ xen_force_evtchn_callback(*xh);
+ }
preempt_enable();
} else
preempt_enable_no_resched();
@@ -66,11 +89,19 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
asmlinkage __visible void xen_irq_disable(void)
{
+ xenhost_t **xh;
+
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
- xh_default->xen_vcpu[smp_processor_id()]->evtchn_upcall_mask = 1;
+ for_each_xenhost(xh)
+ /*
+ * Mask events on this CPU for both the xenhosts. As the
+ * comment above mentions, disabling preemption means we
+ * can safely do that.
+ */
+ (*xh)->xen_vcpu[smp_processor_id()]->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
@@ -78,6 +109,7 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
asmlinkage __visible void xen_irq_enable(void)
{
struct vcpu_info *vcpu;
+ xenhost_t **xh;
/*
* We may be preempted as soon as vcpu->evtchn_upcall_mask is
@@ -86,16 +118,25 @@ asmlinkage __visible void xen_irq_enable(void)
*/
preempt_disable();
- vcpu = xh_default->xen_vcpu[smp_processor_id()];
- vcpu->evtchn_upcall_mask = 0;
+ /* Given that the interrupts are generated from the default xenhost,
+ * we should do this in reverse order.
+ */
+ for_each_xenhost(xh) {
+ vcpu = (*xh)->xen_vcpu[smp_processor_id()];
+ vcpu->evtchn_upcall_mask = 0;
- /* Doesn't matter if we get preempted here, because any
- pending event will get dealt with anyway. */
+ /* We could get preempted by an incoming interrupt here with a
+ * half enabled irq (for the first xenhost.)
+ */
+ }
barrier(); /* unmask then check (avoid races) */
- if (unlikely(vcpu->evtchn_upcall_pending))
- xen_force_evtchn_callback();
+ for_each_xenhost(xh) {
+ vcpu = (*xh)->xen_vcpu[smp_processor_id()];
+ if (unlikely(vcpu->evtchn_upcall_pending))
+ xen_force_evtchn_callback(*xh);
+ }
preempt_enable();
}
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
@@ -343,6 +343,17 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
#else
ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
+ /*
+ * We setup an upcall handler only for the default xenhost. The remote
+ * xenhost will generate evtchn events, but an additional callback would be
+ * quite hairy, since we would have VCPU state initialised in multiple
+ * hypervisors and issues like re-entrancy of upcalls.
+ *
+ * It would be simpler if the callback from L0-Xen could be bounced
+ * bounced via L1-Xen. This also simplifies the pv_irq_ops code
+ * because now the CPU's IF processing only needs to happen on
+ * xh_default->vcpu_info.
+ */
ctxt->event_callback_eip =
(unsigned long)xen_hypervisor_callback;
ctxt->failsafe_callback_eip =
For configurations with multiple xenhosts, we need to handle events generated from multiple xenhosts. Having more than one upcall handler might be quite hairy, and it would be simpler if the callback from L0-Xen could be bounced via L1-Xen. This will also mean simpler pv_irq_ops code because now the IF flag maps onto the xh_default->vcpu_info->evtchn_upcall_mask. However, we still update the xh_remote->vcpu_info->evtchn_upcall_mask on a best effort basis to minimize unnecessary work in remote xenhost. TODO: - direct pv_ops.irq are disabled. Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com> --- arch/x86/xen/Makefile | 2 +- arch/x86/xen/enlighten_pv.c | 4 ++- arch/x86/xen/irq.c | 69 +++++++++++++++++++++++++++++-------- arch/x86/xen/smp_pv.c | 11 ++++++ 4 files changed, 70 insertions(+), 16 deletions(-)