@@ -252,6 +252,19 @@ config X86_DEBUG_FPU
If unsure, say N.
+config DEBUG_PARAVIRT_SELFTEST
+ bool "Enable paravirt runtime selftest"
+ depends on PARAVIRT
+ depends on PARAVIRT_RUNTIME
+ depends on PARAVIRT_SPINLOCKS
+ depends on KVM_GUEST
+ help
+ This option enables sanity testing of the runtime paravirtualized
+ patching code. Triggered via debugfs.
+
+ Might help diagnose patching problems in different
+ configurations and loads.
+
config PUNIT_ATOM_DEBUG
tristate "ATOM Punit debug driver"
depends on PCI
@@ -114,6 +114,7 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o
obj-$(CONFIG_AMD_NB) += amd_nb.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
+obj-$(CONFIG_DEBUG_PARAVIRT_SELFTEST) += pv_selftest.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o
@@ -26,6 +26,7 @@
#include <asm/insn.h>
#include <asm/io.h>
#include <asm/fixmap.h>
+#include "pv_selftest.h"
int __read_mostly alternatives_patched;
@@ -1549,6 +1550,12 @@ static void __maybe_unused text_poke_site(struct text_poke_state *tps,
*/
poke_sync(tps, PATCH_SYNC_0, offset, &int3, INT3_INSN_SIZE);
+ /*
+ * We have an INT3 in place; execute a contrived selftest that
+ * has an insn sequence that is under patching.
+ */
+ pv_selftest_primary();
+
/* Poke remaining */
poke_sync(tps, PATCH_SYNC_1, offset + INT3_INSN_SIZE,
tp->text + INT3_INSN_SIZE, tp->native.len - INT3_INSN_SIZE);
@@ -1634,6 +1641,19 @@ static void text_poke_sync_site(struct text_poke_state *tps)
smp_cond_load_acquire(&tps->state,
prevstate != VAL);
+ /*
+ * Send an NMI to one of the other CPUs.
+ */
+ pv_selftest_send_nmi();
+
+ /*
+ * We have an INT3 in place; execute a contrived selftest that
+ * has an insn sequence that is under patching.
+ *
+ * Note that this function is also called from BP fixup but
+ * is just an NOP when called from there.
+ */
+ pv_selftest_secondary();
prevstate = READ_ONCE(tps->state);
/*
@@ -779,7 +779,7 @@ arch_initcall(kvm_alloc_cpumask);
#ifdef CONFIG_PARAVIRT_SPINLOCKS
/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
-static void kvm_kick_cpu(int cpu)
+void kvm_kick_cpu(int cpu)
{
int apicid;
unsigned long flags = 0;
@@ -790,7 +790,7 @@ static void kvm_kick_cpu(int cpu)
#include <asm/qspinlock.h>
-static void kvm_wait(u8 *ptr, u8 val)
+void kvm_wait(u8 *ptr, u8 val)
{
unsigned long flags;
new file mode 100644
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/memory.h>
+#include <linux/nmi.h>
+#include <linux/uaccess.h>
+#include <asm/apic.h>
+#include <asm/text-patching.h>
+#include <asm/paravirt.h>
+#include <asm/paravirt_types.h>
+#include "pv_selftest.h"
+
+static int nmi_selftest;
+static bool cond_state;
+
+#define SELFTEST_PARAVIRT 1
+static int test_mode;
+
+/*
+ * Mark this and the following functions __always_inline to ensure
+ * we generate multiple patch sites that can be hit independently
+ * in thread, NMI etc contexts.
+ */
+static __always_inline void selftest_pv(void)
+{
+ struct qspinlock test;
+
+ memset(&test, 0, sizeof(test));
+
+ test.locked = _Q_LOCKED_VAL;
+
+ /*
+ * Sits directly in the path of the test.
+ *
+ * The primary sets up an INT3 instruction at pv_queued_spin_unlock().
+ * Both the primary and secondary CPUs should hit that in both
+ * thread and NMI contexts.
+ *
+ * Additionally, this also gets inlined in nmi_pv_callback() so we
+ * should hit this with nmi_selftest.
+ *
+ * The fixup takes place in poke_int3_native().
+ */
+ pv_queued_spin_unlock(&test);
+}
+
+static __always_inline void patch_selftest(void)
+{
+ if (test_mode == SELFTEST_PARAVIRT)
+ selftest_pv();
+}
+
+static DEFINE_PER_CPU(int, selftest_count);
+void pv_selftest_secondary(void)
+{
+ /*
+ * On the secondary we execute the same code in both the
+ * thread-context and the BP-context and so would hit this
+ * recursively if we do inside the fixup context.
+ *
+ * So we trigger the selftest only if it's not ongoing already
+ * (thus allowing the thread or NMI context, but excluding
+ * the INT3 handling path.)
+ */
+ if (this_cpu_read(selftest_count))
+ return;
+
+ this_cpu_inc(selftest_count);
+
+ patch_selftest();
+
+ this_cpu_dec(selftest_count);
+}
+
+void pv_selftest_primary(void)
+{
+ patch_selftest();
+}
+
+/*
+ * We only come here if nmi_selftest > 0.
+ * - nmi_selftest >= 1: execute a pv-op that will be patched
+ * - nmi_selftest >= 2: execute a paired pv-op that is also contended
+ * - nmi_selftest >= 3: add lock contention
+ */
+static int nmi_callback(unsigned int val, struct pt_regs *regs)
+{
+ static DEFINE_SPINLOCK(nmi_spin);
+
+ if (!nmi_selftest)
+ goto out;
+
+ patch_selftest();
+
+ if (nmi_selftest >= 2) {
+ /*
+ * Depending on whether CONFIG_[UN]INLINE_SPIN_* are
+ * defined or not, these would get patched or just
+ * create race conditions between via NMIs.
+ */
+ spin_lock(&nmi_spin);
+
+ /* Dilate the critical section to force contention. */
+ if (nmi_selftest >= 3)
+ udelay(1);
+
+ spin_unlock(&nmi_spin);
+ }
+
+ /*
+ * nmi_selftest > 0, but we should really have a bitmap where
+ * to check if this really was destined for us or not.
+ */
+ return NMI_HANDLED;
+out:
+ return NMI_DONE;
+}
+
+void pv_selftest_register(void)
+{
+ register_nmi_handler(NMI_LOCAL, nmi_callback,
+ 0, "paravirt_nmi_selftest");
+}
+
+void pv_selftest_unregister(void)
+{
+ unregister_nmi_handler(NMI_LOCAL, "paravirt_nmi_selftest");
+}
+
+void pv_selftest_send_nmi(void)
+{
+ int cpu = smp_processor_id();
+ /* NMI or INT3 */
+ if (nmi_selftest && !in_interrupt())
+ apic->send_IPI(cpu + 1 % num_online_cpus(), NMI_VECTOR);
+}
+
+/*
+ * Just declare these locally here instead of having them be
+ * exposed to the whole world.
+ */
+void kvm_wait(u8 *ptr, u8 val);
+void kvm_kick_cpu(int cpu);
+bool __raw_callee_save___kvm_vcpu_is_preempted(long cpu);
+static void pv_spinlocks(void)
+{
+ paravirt_stage_alt(cond_state,
+ lock.queued_spin_lock_slowpath,
+ __pv_queued_spin_lock_slowpath);
+ paravirt_stage_alt(cond_state, lock.queued_spin_unlock.func,
+ PV_CALLEE_SAVE(__pv_queued_spin_unlock).func);
+ paravirt_stage_alt(cond_state, lock.wait, kvm_wait);
+ paravirt_stage_alt(cond_state, lock.kick, kvm_kick_cpu);
+
+ paravirt_stage_alt(cond_state,
+ lock.vcpu_is_preempted.func,
+ PV_CALLEE_SAVE(__kvm_vcpu_is_preempted).func);
+}
+
+void pv_trigger(void)
+{
+ bool nmi_mode = nmi_selftest ? true : false;
+ int ret;
+
+ pr_debug("%s: nmi=%d; NMI-mode=%d\n", __func__, nmi_selftest, nmi_mode);
+
+ mutex_lock(&text_mutex);
+
+ paravirt_stage_zero();
+ pv_spinlocks();
+
+ /*
+ * paravirt patching for pv_locks can potentially deadlock
+ * if we are running with nmi_mode=false and we get an NMI.
+ *
+ * For the sake of testing that path, we risk it. However, if
+ * we are generating synthetic NMIs (nmi_selftest > 0) then
+ * run with nmi_mode=true.
+ */
+ ret = paravirt_runtime_patch(nmi_mode);
+
+ /*
+ * Flip the state so we switch the pv_lock_ops on the next test.
+ */
+ cond_state = !cond_state;
+
+ mutex_unlock(&text_mutex);
+
+ pr_debug("%s: nmi=%d; NMI-mode=%d, ret=%d\n", __func__, nmi_selftest,
+ nmi_mode, ret);
+}
+
+static void pv_selftest_trigger(void)
+{
+ test_mode = SELFTEST_PARAVIRT;
+ pv_trigger();
+}
+
+static ssize_t pv_selftest_write(struct file *file, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ pv_selftest_register();
+ pv_selftest_trigger();
+ pv_selftest_unregister();
+
+ return count;
+}
+
+static ssize_t pv_nmi_read(struct file *file, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[32];
+ unsigned int len;
+
+ len = snprintf(buf, sizeof(buf), "%d\n", nmi_selftest);
+ return simple_read_from_buffer(ubuf, count, ppos, buf, len);
+}
+
+static ssize_t pv_nmi_write(struct file *file, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[32];
+ unsigned int len;
+ unsigned int enabled;
+
+ len = min(sizeof(buf) - 1, count);
+ if (copy_from_user(buf, ubuf, len))
+ return -EFAULT;
+
+ buf[len] = '\0';
+ if (kstrtoint(buf, 0, &enabled))
+ return -EINVAL;
+
+ nmi_selftest = enabled > 3 ? 3 : enabled;
+
+ return count;
+}
+
+static const struct file_operations pv_selftest_fops = {
+ .read = NULL,
+ .write = pv_selftest_write,
+ .llseek = default_llseek,
+};
+
+static const struct file_operations pv_nmi_fops = {
+ .read = pv_nmi_read,
+ .write = pv_nmi_write,
+ .llseek = default_llseek,
+};
+
+static int __init pv_selftest_init(void)
+{
+ struct dentry *d = debugfs_create_dir("pv_selftest", NULL);
+
+ debugfs_create_file("toggle", 0600, d, NULL, &pv_selftest_fops);
+ debugfs_create_file("nmi", 0600, d, NULL, &pv_nmi_fops);
+
+ return 0;
+}
+
+late_initcall(pv_selftest_init);
new file mode 100644
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PVR_SELFTEST_H
+#define _PVR_SELFTEST_H
+
+#ifdef CONFIG_DEBUG_PARAVIRT_SELFTEST
+void pv_selftest_send_nmi(void);
+void pv_selftest_primary(void);
+void pv_selftest_secondary(void);
+#else
+static inline void pv_selftest_send_nmi(void) { }
+static inline void pv_selftest_primary(void) { }
+static inline void pv_selftest_secondary(void) { }
+#endif /*! CONFIG_DEBUG_PARAVIRT_SELFTEST */
+
+#endif /* _PVR_SELFTEST_H */
Add a selftest that triggers paravirt_runtime_patch() which toggles between the paravirt and native pv_lock_ops. The selftest also register an NMI handler, which exercises the patched pv-ops by spin-lock operations. These are triggered via artificially sent NMIs. And last, introduce patch sites in the primary and secondary patching code which are hit while during the patching process. Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com> --- arch/x86/Kconfig.debug | 13 ++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/alternative.c | 20 +++ arch/x86/kernel/kvm.c | 4 +- arch/x86/kernel/pv_selftest.c | 264 ++++++++++++++++++++++++++++++++++ arch/x86/kernel/pv_selftest.h | 15 ++ 6 files changed, 315 insertions(+), 2 deletions(-) create mode 100644 arch/x86/kernel/pv_selftest.c create mode 100644 arch/x86/kernel/pv_selftest.h