diff mbox

[v3,07/23] xsplice: Implement support for applying/reverting/replacing patches. (v5)

Message ID 1455300361-13092-8-git-send-email-konrad.wilk@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Konrad Rzeszutek Wilk Feb. 12, 2016, 6:05 p.m. UTC
From: Ross Lagerwall <ross.lagerwall@citrix.com>

Implement support for the apply, revert and replace actions.

To perform and action on a payload, the hypercall sets up a data
structure to schedule the work.  A hook is added in all the
return-to-guest paths to check for work to do and execute it if needed.
In this way, patches can be applied with all CPUs idle and without
stacks.  The first CPU to do_xsplice() becomes the master and triggers a
reschedule softirq to trigger all the other CPUs to enter do_xsplice()
with no stack.  Once all CPUs have rendezvoused, all CPUs disable IRQs
and NMIs are ignored. The system is then quiscient and the master
performs the action.  After this, all CPUs enable IRQs and NMIs are
re-enabled.

The action to perform is one of:
- APPLY: For each function in the module, store the first 5 bytes of the
  old function and replace it with a jump to the new function.
- REVERT: Copy the previously stored bytes into the first 5 bytes of the
  old function.
- REPLACE: Revert each applied module and then apply the new module.

To prevent a deadlock with any other barrier in the system, the master
will wait for up to 30ms before timing out.  I've taken some
measurements and found the patch application to take about 100 ?s on a
72 CPU system, whether idle or fully loaded.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
--
v2: - Pluck the 'struct xsplice_patch_func' in this patch.
    - Modify code per review comments.
    - Add more data in the keyboard handler.
    - Redo the patching code, split it in functions.
v3: - Add return_ macro for debug builds.
    - Move s/payload_list_lock/payload_list/ to earlier patch
    - Remove const and use ELF types for xsplice_patch_func
v4: - Add check routine to do simple sanity checks for various
      sections.
v5: - s/%p/PRIx64/ as ARM builds complain.
---
 xen/arch/arm/xsplice.c      |  10 +-
 xen/arch/x86/domain.c       |   4 +
 xen/arch/x86/hvm/svm/svm.c  |   2 +
 xen/arch/x86/hvm/vmx/vmcs.c |   2 +
 xen/arch/x86/xsplice.c      |  19 +++
 xen/common/xsplice.c        | 372 ++++++++++++++++++++++++++++++++++++++++++--
 xen/common/xsplice_elf.c    |   8 +-
 xen/include/asm-arm/nmi.h   |  13 ++
 xen/include/xen/xsplice.h   |  21 +++
 9 files changed, 432 insertions(+), 19 deletions(-)

Comments

Andrew Cooper Feb. 16, 2016, 7:11 p.m. UTC | #1
On 12/02/16 18:05, Konrad Rzeszutek Wilk wrote:
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index 9d43f7b..b5995b9 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -36,6 +36,7 @@
>  #include <xen/cpu.h>
>  #include <xen/wait.h>
>  #include <xen/guest_access.h>
> +#include <xen/xsplice.h>
>  #include <public/sysctl.h>
>  #include <public/hvm/hvm_vcpu.h>
>  #include <asm/regs.h>
> @@ -121,6 +122,7 @@ static void idle_loop(void)
>          (*pm_idle)();
>          do_tasklet();
>          do_softirq();
> +        do_xsplice(); /* Must be last. */

Then name "do_xsplice()" is slightly misleading (although it is in equal
company here).  check_for_xsplice_work() would be more accurate.

> diff --git a/xen/arch/x86/xsplice.c b/xen/arch/x86/xsplice.c
> index 814dd52..ae35e91 100644
> --- a/xen/arch/x86/xsplice.c
> +++ b/xen/arch/x86/xsplice.c
> @@ -10,6 +10,25 @@
>                              __func__,__LINE__, x); return x; }
>  #endif
>  
> +#define PATCH_INSN_SIZE 5

Somewhere you should have a BUILD_BUG_ON() confirming that
PATCH_INSN_SIZE fits within the undo array.

Having said that, I think all of xsplice_patch_func should be
arch-specific rather than generic.

> +
> +void xsplice_apply_jmp(struct xsplice_patch_func *func)
> +{
> +    uint32_t val;
> +    uint8_t *old_ptr;
> +
> +    old_ptr = (uint8_t *)func->old_addr;
> +    memcpy(func->undo, old_ptr, PATCH_INSN_SIZE);

At least a newline here please.

> +    *old_ptr++ = 0xe9; /* Relative jump */
> +    val = func->new_addr - func->old_addr - PATCH_INSN_SIZE;

E9 takes a rel32 parameter, which is signed.

I think you need to explicitly cast to intptr_t and used signed
arithmetic throughout this calculation to correctly calculate a
backwards jump.

I think there should also be some sanity checks that both old_addr and
new_addr are in the Xen 1G virtual region.

> +    memcpy(old_ptr, &val, sizeof val);
> +}
> +
> +void xsplice_revert_jmp(struct xsplice_patch_func *func)
> +{
> +    memcpy((void *)func->old_addr, func->undo, PATCH_INSN_SIZE);

_p() is common shorthand in Xen for a cast to (void *)

> +}
> +
>  int xsplice_verify_elf(struct xsplice_elf *elf, uint8_t *data)
>  {
>  
> diff --git a/xen/common/xsplice.c b/xen/common/xsplice.c
> index fbd6129..b854c0a 100644
> --- a/xen/common/xsplice.c
> +++ b/xen/common/xsplice.c
> @@ -3,6 +3,7 @@
>   *
>   */
>  
> +#include <xen/cpu.h>
>  #include <xen/guest_access.h>
>  #include <xen/keyhandler.h>
>  #include <xen/lib.h>
> @@ -10,16 +11,25 @@
>  #include <xen/mm.h>
>  #include <xen/sched.h>
>  #include <xen/smp.h>
> +#include <xen/softirq.h>
>  #include <xen/spinlock.h>
> +#include <xen/wait.h>
>  #include <xen/xsplice_elf.h>
>  #include <xen/xsplice.h>
>  
>  #include <asm/event.h>
> +#include <asm/nmi.h>
>  #include <public/sysctl.h>
>  
> +/*
> + * Protects against payload_list operations and also allows only one
> + * caller in schedule_work.
> + */
>  static DEFINE_SPINLOCK(payload_lock);
>  static LIST_HEAD(payload_list);
>  
> +static LIST_HEAD(applied_list);
> +
>  static unsigned int payload_cnt;
>  static unsigned int payload_version = 1;
>  
> @@ -29,6 +39,9 @@ struct payload {
>      struct list_head list;               /* Linked to 'payload_list'. */
>      void *payload_address;               /* Virtual address mapped. */
>      size_t payload_pages;                /* Nr of the pages. */
> +    struct list_head applied_list;       /* Linked to 'applied_list'. */
> +    struct xsplice_patch_func *funcs;    /* The array of functions to patch. */
> +    unsigned int nfuncs;                 /* Nr of functions to patch. */
>  
>      char name[XEN_XSPLICE_NAME_SIZE + 1];/* Name of it. */
>  };
> @@ -36,6 +49,24 @@ struct payload {
>  static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len);
>  static void free_payload_data(struct payload *payload);
>  
> +/* Defines an outstanding patching action. */
> +struct xsplice_work
> +{
> +    atomic_t semaphore;          /* Used for rendezvous. First to grab it will
> +                                    do the patching. */
> +    atomic_t irq_semaphore;      /* Used to signal all IRQs disabled. */
> +    uint32_t timeout;                    /* Timeout to do the operation. */
> +    struct payload *data;        /* The payload on which to act. */
> +    volatile bool_t do_work;     /* Signals work to do. */
> +    volatile bool_t ready;       /* Signals all CPUs synchronized. */
> +    uint32_t cmd;                /* Action request: XSPLICE_ACTION_* */
> +};
> +
> +/* There can be only one outstanding patching action. */
> +static struct xsplice_work xsplice_work;

This is a scalability issue, specifically that every cpu on the
return-to-guest path polls the bytes making up do_work.  See below for
my suggestion to fix this.

> +
> +static int schedule_work(struct payload *data, uint32_t cmd, uint32_t timeout);
> +
>  static const char *state2str(int32_t state)
>  {
>  #define STATE(x) [XSPLICE_STATE_##x] = #x
> @@ -61,14 +92,23 @@ static const char *state2str(int32_t state)
>  static void xsplice_printall(unsigned char key)
>  {
>      struct payload *data;
> +    unsigned int i;
>  
>      spin_lock(&payload_lock);
>  
>      list_for_each_entry ( data, &payload_list, list )
> -        printk(" name=%s state=%s(%d) %p using %zu pages.\n", data->name,
> +    {
> +        printk(" name=%s state=%s(%d) %p using %zu pages:\n", data->name,
>                 state2str(data->state), data->state, data->payload_address,
>                 data->payload_pages);
>  
> +        for ( i = 0; i < data->nfuncs; i++ )
> +        {
> +            struct xsplice_patch_func *f = &(data->funcs[i]);
> +            printk("    %s patch 0x%"PRIx64"(%u) with 0x%"PRIx64"(%u)\n",
> +                   f->name, f->old_addr, f->old_size, f->new_addr, f->new_size);
> +        }

For a large patch, this could be thousands of entries.  You need to
periodically process pending softirqs to avoid a watchdog timeout.

>  static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len)
>  {
>      struct xsplice_elf elf;
> @@ -605,7 +695,14 @@ static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len)
>      if ( rc )
>          goto err_payload;
>  
> -    /* Free our temporary data structure. */
> +    rc = check_special_sections(payload, &elf);
> +    if ( rc )
> +        goto err_payload;
> +
> +    rc = find_special_sections(payload, &elf);
> +    if ( rc )
> +        goto err_payload;
> +
>      xsplice_elf_free(&elf);
>      return 0;
>  
> @@ -617,6 +714,253 @@ static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len)
>      return rc;
>  }
>  
> +
> +/*
> + * The following functions get the CPUs into an appropriate state and
> + * apply (or revert) each of the module's functions.
> + */
> +
> +/*
> + * This function is executed having all other CPUs with no stack (we may
> + * have cpu_idle on it) and IRQs disabled. We guard against NMI by temporarily
> + * installing our NOP NMI handler.
> + */
> +static int apply_payload(struct payload *data)
> +{
> +    unsigned int i;
> +
> +    printk(XENLOG_DEBUG "%s: Applying %u functions.\n", data->name,
> +           data->nfuncs);
> +
> +    for ( i = 0; i < data->nfuncs; i++ )
> +        xsplice_apply_jmp(data->funcs + i);

In cases such as this, it is better to use data->funcs[i], as the
compiler can more easily perform pointer alias analysis.

> +
> +    list_add_tail(&data->applied_list, &applied_list);
> +
> +    return 0;
> +}
> +
> +/*
> + * This function is executed having all other CPUs with no stack (we may
> + * have cpu_idle on it) and IRQs disabled.
> + */
> +static int revert_payload(struct payload *data)
> +{
> +    unsigned int i;
> +
> +    printk(XENLOG_DEBUG "%s: Reverting.\n", data->name);
> +
> +    for ( i = 0; i < data->nfuncs; i++ )
> +        xsplice_revert_jmp(data->funcs + i);
> +
> +    list_del(&data->applied_list);
> +
> +    return 0;
> +}
> +
> +/* Must be holding the payload_lock. */
> +static int schedule_work(struct payload *data, uint32_t cmd, uint32_t timeout)
> +{
> +    /* Fail if an operation is already scheduled. */
> +    if ( xsplice_work.do_work )
> +        return -EBUSY;
> +
> +    xsplice_work.cmd = cmd;
> +    xsplice_work.data = data;
> +    xsplice_work.timeout = timeout ? timeout : MILLISECS(30);

Can shorten to "timeout ?: MILLISECS(30);"

> +
> +    printk(XENLOG_DEBUG "%s: timeout is %"PRI_stime"ms\n", data->name,
> +           xsplice_work.timeout / MILLISECS(1));
> +
> +    atomic_set(&xsplice_work.semaphore, -1);
> +    atomic_set(&xsplice_work.irq_semaphore, -1);
> +
> +    xsplice_work.ready = 0;
> +    smp_wmb();
> +    xsplice_work.do_work = 1;
> +    smp_wmb();
> +
> +    return 0;
> +}
> +
> +/*
> + * Note that because of this NOP code the do_nmi is not safely patchable.
> + * Also if we do receive 'real' NMIs we have lost them.

The MCE path needs consideration as well.  Unlike the NMI path however,
that one cannot be ignored.

In both cases, it might be best to see about raising a tasklet or
softirq to pick up some deferred work.

> + */
> +static int mask_nmi_callback(const struct cpu_user_regs *regs, int cpu)
> +{
> +    return 1;
> +}
> +
> +static void reschedule_fn(void *unused)
> +{
> +    smp_mb(); /* Synchronize with setting do_work */
> +    raise_softirq(SCHEDULE_SOFTIRQ);

As you have to IPI each processor to raise a schedule softirq, you can
set a per-cpu "xsplice enter rendezvous" variable.  This prevents the
need for the return-to-guest path to poll one single byte.

> +}
> +
> +static int xsplice_do_wait(atomic_t *counter, s_time_t timeout,
> +                           unsigned int total_cpus, const char *s)
> +{
> +    int rc = 0;
> +
> +    while ( atomic_read(counter) != total_cpus && NOW() < timeout )
> +        cpu_relax();
> +
> +    /* Log & abort. */
> +    if ( atomic_read(counter) != total_cpus )
> +    {
> +        printk(XENLOG_DEBUG "%s: %s %u/%u\n", xsplice_work.data->name,
> +               s, atomic_read(counter), total_cpus);
> +        rc = -EBUSY;
> +        xsplice_work.data->rc = rc;
> +        xsplice_work.do_work = 0;
> +        smp_wmb();
> +        return rc;
> +    }
> +    return rc;
> +}
> +
> +static void xsplice_do_single(unsigned int total_cpus)
> +{
> +    nmi_callback_t saved_nmi_callback;
> +    struct payload *data, *tmp;
> +    s_time_t timeout;
> +    int rc;
> +
> +    data = xsplice_work.data;
> +    timeout = xsplice_work.timeout + NOW();
> +    if ( xsplice_do_wait(&xsplice_work.semaphore, timeout, total_cpus,
> +                         "Timed out on CPU semaphore") )
> +        return;
> +
> +    /* "Mask" NMIs. */
> +    saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
> +
> +    /* All CPUs are waiting, now signal to disable IRQs. */
> +    xsplice_work.ready = 1;
> +    smp_wmb();
> +
> +    atomic_inc(&xsplice_work.irq_semaphore);
> +    if ( xsplice_do_wait(&xsplice_work.irq_semaphore, timeout, total_cpus,
> +                         "Timed out on IRQ semaphore.") )

This path "leaks" the NMI mask.  It would be better use "goto out;"
handling in the function to make it clearer that the error paths are
taken care of.

> +        return;
> +
> +    local_irq_disable();

local_irq_save().  Easier to restore on an error path.

> +    /* Now this function should be the only one on any stack.
> +     * No need to lock the payload list or applied list. */
> +    switch ( xsplice_work.cmd )
> +    {
> +    case XSPLICE_ACTION_APPLY:
> +        rc = apply_payload(data);
> +        if ( rc == 0 )
> +            data->state = XSPLICE_STATE_APPLIED;
> +        break;
> +    case XSPLICE_ACTION_REVERT:
> +        rc = revert_payload(data);
> +        if ( rc == 0 )
> +            data->state = XSPLICE_STATE_CHECKED;
> +        break;
> +    case XSPLICE_ACTION_REPLACE:
> +        list_for_each_entry_safe_reverse ( data, tmp, &applied_list, list )
> +        {
> +            data->rc = revert_payload(data);
> +            if ( data->rc == 0 )
> +                data->state = XSPLICE_STATE_CHECKED;
> +            else
> +            {
> +                rc = -EINVAL;
> +                break;
> +            }
> +        }
> +        if ( rc != -EINVAL )
> +        {
> +            rc = apply_payload(xsplice_work.data);
> +            if ( rc == 0 )
> +                xsplice_work.data->state = XSPLICE_STATE_APPLIED;
> +        }
> +        break;
> +    default:
> +        rc = -EINVAL;
> +        break;
> +    }

Once code modification is complete, you must execute a serialising
instruction such as cpuid, to flush the pipeline, on all cpus.  (Refer
to Intel SDM 3 8.1.4 "Handling Self- and Cross-Modifying Code")

> +
> +    xsplice_work.data->rc = rc;
> +
> +    local_irq_enable();
> +    set_nmi_callback(saved_nmi_callback);
> +
> +    xsplice_work.do_work = 0;
> +    smp_wmb(); /* Synchronize with waiting CPUs. */
> +}
> +
> +/*
> + * The main function which manages the work of quiescing the system and
> + * patching code.
> + */
> +void do_xsplice(void)
> +{
> +    struct payload *p = xsplice_work.data;
> +    unsigned int cpu = smp_processor_id();
> +
> +    /* Fast path: no work to do. */
> +    if ( likely(!xsplice_work.do_work) )
> +        return;
> +    ASSERT(local_irq_is_enabled());
> +
> +    /* Set at -1, so will go up to num_online_cpus - 1 */
> +    if ( atomic_inc_and_test(&xsplice_work.semaphore) )
> +    {
> +        unsigned int total_cpus;
> +
> +        if ( !get_cpu_maps() )
> +        {
> +            printk(XENLOG_DEBUG "%s: CPU%u - unable to get cpu_maps lock.\n",
> +                   p->name, cpu);
> +            xsplice_work.data->rc = -EBUSY;
> +            xsplice_work.do_work = 0;
> +            return;

This error path leaves a ref in the semaphore.

> +        }
> +
> +        barrier(); /* MUST do it after get_cpu_maps. */
> +        total_cpus = num_online_cpus() - 1;
> +
> +        if ( total_cpus )
> +        {
> +            printk(XENLOG_DEBUG "%s: CPU%u - IPIing the %u CPUs.\n", p->name,
> +                   cpu, total_cpus);
> +            smp_call_function(reschedule_fn, NULL, 0);
> +        }
> +        (void)xsplice_do_single(total_cpus);
> +
> +        ASSERT(local_irq_is_enabled());
> +
> +        put_cpu_maps();
> +
> +        printk(XENLOG_DEBUG "%s finished with rc=%d\n", p->name, p->rc);
> +    }
> +    else
> +    {
> +        /* Wait for all CPUs to rendezvous. */
> +        while ( xsplice_work.do_work && !xsplice_work.ready )
> +        {
> +            cpu_relax();
> +            smp_rmb();
> +        }
> +

What happens here if the rendezvous initiator times out?  Looks like we
will spin forever waiting for do_work which will never drop back to 0.

> +        /* Disable IRQs and signal. */
> +        local_irq_disable();
> +        atomic_inc(&xsplice_work.irq_semaphore);
> +
> +        /* Wait for patching to complete. */
> +        while ( xsplice_work.do_work )
> +        {
> +            cpu_relax();
> +            smp_rmb();
> +        }
> +        local_irq_enable();

Splitting the modification of do_work and ready across multiple
functions makes it particularly hard to reason about the correctness of
the rendezvous.  It would be better to have a xsplice_rendezvous()
function whose purpose was to negotiate the rendezvous only, using local
static state.  The action can then be just the switch() from
xsplice_do_single().

> +    }
> +}
> +
> diff --git a/xen/include/asm-arm/nmi.h b/xen/include/asm-arm/nmi.h
> index a60587e..82aff35 100644
> --- a/xen/include/asm-arm/nmi.h
> +++ b/xen/include/asm-arm/nmi.h
> @@ -4,6 +4,19 @@
>  #define register_guest_nmi_callback(a)  (-ENOSYS)
>  #define unregister_guest_nmi_callback() (-ENOSYS)
>  
> +typedef int (*nmi_callback_t)(const struct cpu_user_regs *regs, int cpu);
> +
> +/**
> + * set_nmi_callback
> + *
> + * Set a handler for an NMI. Only one handler may be
> + * set. Return the old nmi callback handler.
> + */
> +static inline nmi_callback_t set_nmi_callback(nmi_callback_t callback)
> +{
> +    return NULL;
> +}
> +

This addition suggests that there should probably be an
arch_xsplice_prepair_rendezvous() and arch_xsplice_finish_rendezvous().

~Andrew
Ross Lagerwall Feb. 17, 2016, 8:58 a.m. UTC | #2
On 02/16/2016 07:11 PM, Andrew Cooper wrote:
snip
>> +        }
>> +
>> +        barrier(); /* MUST do it after get_cpu_maps. */
>> +        total_cpus = num_online_cpus() - 1;
>> +
>> +        if ( total_cpus )
>> +        {
>> +            printk(XENLOG_DEBUG "%s: CPU%u - IPIing the %u CPUs.\n", p->name,
>> +                   cpu, total_cpus);
>> +            smp_call_function(reschedule_fn, NULL, 0);
>> +        }
>> +        (void)xsplice_do_single(total_cpus);
>> +
>> +        ASSERT(local_irq_is_enabled());
>> +
>> +        put_cpu_maps();
>> +
>> +        printk(XENLOG_DEBUG "%s finished with rc=%d\n", p->name, p->rc);
>> +    }
>> +    else
>> +    {
>> +        /* Wait for all CPUs to rendezvous. */
>> +        while ( xsplice_work.do_work && !xsplice_work.ready )
>> +        {
>> +            cpu_relax();
>> +            smp_rmb();
>> +        }
>> +
>
> What happens here if the rendezvous initiator times out?  Looks like we
> will spin forever waiting for do_work which will never drop back to 0.

xsplice_do_wait() sets do_work to 0 on a timeout.
Jan Beulich Feb. 17, 2016, 10:50 a.m. UTC | #3
>>> On 16.02.16 at 20:11, <andrew.cooper3@citrix.com> wrote:
> On 12/02/16 18:05, Konrad Rzeszutek Wilk wrote:
>> +void xsplice_revert_jmp(struct xsplice_patch_func *func)
>> +{
>> +    memcpy((void *)func->old_addr, func->undo, PATCH_INSN_SIZE);
> 
> _p() is common shorthand in Xen for a cast to (void *)

Iirc this was meant to be used only in printk() arguments, and may
also only have been needed to abstract out some 32-/64-bit
differences. I'd certainly discourage use here.

>> +static int apply_payload(struct payload *data)
>> +{
>> +    unsigned int i;
>> +
>> +    printk(XENLOG_DEBUG "%s: Applying %u functions.\n", data->name,
>> +           data->nfuncs);
>> +
>> +    for ( i = 0; i < data->nfuncs; i++ )
>> +        xsplice_apply_jmp(data->funcs + i);
> 
> In cases such as this, it is better to use data->funcs[i], as the
> compiler can more easily perform pointer alias analysis.

Why would that be? &x[i] and x + i are identical for all purposes
afaik.

Jan
Ross Lagerwall Feb. 19, 2016, 9:30 a.m. UTC | #4
On 02/16/2016 07:11 PM, Andrew Cooper wrote:
> On 12/02/16 18:05, Konrad Rzeszutek Wilk wrote:
>> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
>> index 9d43f7b..b5995b9 100644
>> --- a/xen/arch/x86/domain.c
>> +++ b/xen/arch/x86/domain.c
>> @@ -36,6 +36,7 @@
>>   #include <xen/cpu.h>
>>   #include <xen/wait.h>
>>   #include <xen/guest_access.h>
>> +#include <xen/xsplice.h>
>>   #include <public/sysctl.h>
>>   #include <public/hvm/hvm_vcpu.h>
>>   #include <asm/regs.h>
>> @@ -121,6 +122,7 @@ static void idle_loop(void)
>>           (*pm_idle)();
>>           do_tasklet();
>>           do_softirq();
>> +        do_xsplice(); /* Must be last. */
>
> Then name "do_xsplice()" is slightly misleading (although it is in equal
> company here).  check_for_xsplice_work() would be more accurate.
>
>> diff --git a/xen/arch/x86/xsplice.c b/xen/arch/x86/xsplice.c
>> index 814dd52..ae35e91 100644
>> --- a/xen/arch/x86/xsplice.c
>> +++ b/xen/arch/x86/xsplice.c
>> @@ -10,6 +10,25 @@
>>                               __func__,__LINE__, x); return x; }
>>   #endif
>>
>> +#define PATCH_INSN_SIZE 5
>
> Somewhere you should have a BUILD_BUG_ON() confirming that
> PATCH_INSN_SIZE fits within the undo array.
>
> Having said that, I think all of xsplice_patch_func should be
> arch-specific rather than generic.
>
>> +
>> +void xsplice_apply_jmp(struct xsplice_patch_func *func)
>> +{
>> +    uint32_t val;
>> +    uint8_t *old_ptr;
>> +
>> +    old_ptr = (uint8_t *)func->old_addr;
>> +    memcpy(func->undo, old_ptr, PATCH_INSN_SIZE);
>
> At least a newline here please.
>
>> +    *old_ptr++ = 0xe9; /* Relative jump */
>> +    val = func->new_addr - func->old_addr - PATCH_INSN_SIZE;
>
> E9 takes a rel32 parameter, which is signed.
>
> I think you need to explicitly cast to intptr_t and used signed
> arithmetic throughout this calculation to correctly calculate a
> backwards jump.

According to my testing and expectations based on the spec and GCC's 
implementation-defined behaviour, the offset is computed correctly for 
backward (and forward) jumps. I'm sure the types can be improved though...

>
> I think there should also be some sanity checks that both old_addr and
> new_addr are in the Xen 1G virtual region.
>

OK. Though these sanity checks should happen when loading the patch, not 
applying it.
Ross Lagerwall Feb. 22, 2016, 3 p.m. UTC | #5
On 02/12/2016 06:05 PM, Konrad Rzeszutek Wilk wrote:
snip
> +static void xsplice_do_single(unsigned int total_cpus)
> +{
> +    nmi_callback_t saved_nmi_callback;
> +    struct payload *data, *tmp;
> +    s_time_t timeout;
> +    int rc;
> +
> +    data = xsplice_work.data;
> +    timeout = xsplice_work.timeout + NOW();
> +    if ( xsplice_do_wait(&xsplice_work.semaphore, timeout, total_cpus,
> +                         "Timed out on CPU semaphore") )
> +        return;
> +
> +    /* "Mask" NMIs. */
> +    saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
> +
> +    /* All CPUs are waiting, now signal to disable IRQs. */
> +    xsplice_work.ready = 1;
> +    smp_wmb();
> +
> +    atomic_inc(&xsplice_work.irq_semaphore);
> +    if ( xsplice_do_wait(&xsplice_work.irq_semaphore, timeout, total_cpus,
> +                         "Timed out on IRQ semaphore.") )
> +        return;
> +
> +    local_irq_disable();
> +    /* Now this function should be the only one on any stack.
> +     * No need to lock the payload list or applied list. */
> +    switch ( xsplice_work.cmd )
> +    {
> +    case XSPLICE_ACTION_APPLY:
> +        rc = apply_payload(data);
> +        if ( rc == 0 )
> +            data->state = XSPLICE_STATE_APPLIED;
> +        break;
> +    case XSPLICE_ACTION_REVERT:
> +        rc = revert_payload(data);
> +        if ( rc == 0 )
> +            data->state = XSPLICE_STATE_CHECKED;
> +        break;
> +    case XSPLICE_ACTION_REPLACE:
> +        list_for_each_entry_safe_reverse ( data, tmp, &applied_list, list )
> +        {
> +            data->rc = revert_payload(data);
> +            if ( data->rc == 0 )
> +                data->state = XSPLICE_STATE_CHECKED;
> +            else
> +            {
> +                rc = -EINVAL;
> +                break;
> +            }
> +        }

You're using data as a loop iterator here but the variable serves 
another purpose outside the loop. That's not gonna end well.
Ross Lagerwall Feb. 22, 2016, 5:06 p.m. UTC | #6
On 02/22/2016 03:00 PM, Ross Lagerwall wrote:
> On 02/12/2016 06:05 PM, Konrad Rzeszutek Wilk wrote:
> snip
>> +static void xsplice_do_single(unsigned int total_cpus)
>> +{
>> +    nmi_callback_t saved_nmi_callback;
>> +    struct payload *data, *tmp;
>> +    s_time_t timeout;
>> +    int rc;
>> +
>> +    data = xsplice_work.data;
>> +    timeout = xsplice_work.timeout + NOW();
>> +    if ( xsplice_do_wait(&xsplice_work.semaphore, timeout, total_cpus,
>> +                         "Timed out on CPU semaphore") )
>> +        return;
>> +
>> +    /* "Mask" NMIs. */
>> +    saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
>> +
>> +    /* All CPUs are waiting, now signal to disable IRQs. */
>> +    xsplice_work.ready = 1;
>> +    smp_wmb();
>> +
>> +    atomic_inc(&xsplice_work.irq_semaphore);
>> +    if ( xsplice_do_wait(&xsplice_work.irq_semaphore, timeout,
>> total_cpus,
>> +                         "Timed out on IRQ semaphore.") )
>> +        return;
>> +
>> +    local_irq_disable();
>> +    /* Now this function should be the only one on any stack.
>> +     * No need to lock the payload list or applied list. */
>> +    switch ( xsplice_work.cmd )
>> +    {
>> +    case XSPLICE_ACTION_APPLY:
>> +        rc = apply_payload(data);
>> +        if ( rc == 0 )
>> +            data->state = XSPLICE_STATE_APPLIED;
>> +        break;
>> +    case XSPLICE_ACTION_REVERT:
>> +        rc = revert_payload(data);
>> +        if ( rc == 0 )
>> +            data->state = XSPLICE_STATE_CHECKED;
>> +        break;
>> +    case XSPLICE_ACTION_REPLACE:
>> +        list_for_each_entry_safe_reverse ( data, tmp, &applied_list,
>> list )
>> +        {
>> +            data->rc = revert_payload(data);
>> +            if ( data->rc == 0 )
>> +                data->state = XSPLICE_STATE_CHECKED;
>> +            else
>> +            {
>> +                rc = -EINVAL;
>> +                break;
>> +            }
>> +        }
>
> You're using data as a loop iterator here but the variable serves
> another purpose outside the loop. That's not gonna end well.
>

Also above, you've got:
list_for_each_entry_safe_reverse ( data, tmp, &applied_list, list )

but it needs to be:
list_for_each_entry_safe_reverse ( data, tmp, &applied_list, applied_list )

I'm not sure why this was changed from how I had it...

rc is also used uninitialized in the replace path.
Konrad Rzeszutek Wilk Feb. 23, 2016, 8:41 p.m. UTC | #7
. snip..
> > + * Note that because of this NOP code the do_nmi is not safely patchable.
> > + * Also if we do receive 'real' NMIs we have lost them.
> 
> The MCE path needs consideration as well.  Unlike the NMI path however,
> that one cannot be ignored.
> 
> In both cases, it might be best to see about raising a tasklet or
> softirq to pick up some deferred work.

I will put that in a seperate patch as this is patch is big enough.

> 
> > + */
> > +static int mask_nmi_callback(const struct cpu_user_regs *regs, int cpu)
> > +{
> > +    return 1;
> > +}
> > +
> > +static void reschedule_fn(void *unused)
> > +{
> > +    smp_mb(); /* Synchronize with setting do_work */
> > +    raise_softirq(SCHEDULE_SOFTIRQ);
> 
> As you have to IPI each processor to raise a schedule softirq, you can
> set a per-cpu "xsplice enter rendezvous" variable.  This prevents the
> need for the return-to-guest path to poll one single byte.

.. Not sure I follow. The IPI we send to the other CPU is 0xfb - which
makes the smp_call_function_interrupt run, which calls this function:
reschedule_fn(). Then raise_softirq sets the bit on softirq_pending.

Great. Since we caused an IPI that means we ended up calling VMEXIT which
eventually ends calling process_pending_softirqs() which calls schedule().
And after that it calls check_for_xsplice_work().

Are you suggesting to add new softirq that would call in check_for_xsplice_work()?

Or are you suggesting to skip the softirq_pending check and all the
code around that and instead have each VMEXIT code path check this
per-cpu "xsplice enter" variable? If so, why not use the existing
softirq infrastructure? 

.. snip..
> 
> > +}
> > +
> > +void do_xsplice(void)
> > +{
> > +    struct payload *p = xsplice_work.data;
> > +    unsigned int cpu = smp_processor_id();
> > +
> > +    /* Fast path: no work to do. */
> > +    if ( likely(!xsplice_work.do_work) )
> > +        return;
> > +    ASSERT(local_irq_is_enabled());
> > +
> > +    /* Set at -1, so will go up to num_online_cpus - 1 */
> > +    if ( atomic_inc_and_test(&xsplice_work.semaphore) )
> > +    {
> > +        unsigned int total_cpus;
> > +
> > +        if ( !get_cpu_maps() )
> > +        {
> > +            printk(XENLOG_DEBUG "%s: CPU%u - unable to get cpu_maps lock.\n",
> > +                   p->name, cpu);
> > +            xsplice_work.data->rc = -EBUSY;
> > +            xsplice_work.do_work = 0;
> > +            return;
> 
> This error path leaves a ref in the semaphore.

It does. And it also does so in xsplice_do_single() - if the xsplice_do_wait()
fails, 
> 
> > +        }
> > +
> > +        barrier(); /* MUST do it after get_cpu_maps. */
> > +        total_cpus = num_online_cpus() - 1;
> > +
> > +        if ( total_cpus )
> > +        {
> > +            printk(XENLOG_DEBUG "%s: CPU%u - IPIing the %u CPUs.\n", p->name,
> > +                   cpu, total_cpus);
> > +            smp_call_function(reschedule_fn, NULL, 0);
> > +        }
> > +        (void)xsplice_do_single(total_cpus);

.. here, we never decrement the semaphore.

Which is a safe-guard (documenting that).

The issue here is that say we have two CPUs:

CPU0				CPU1

semaphore=0			semaphore=1
 !get_cpu_maps()
  do_work = 0;			.. now goes in the 'slave' part below and exits out
                                as do_work=0

Now if we decremented the semaphore back on the error path:

CPU0				CPU1

semaphore=0			
 !get_cpu_maps()
				.. do_work is still set.
  do_work = 0;			
                   
  semaphore=-1
				atomic_inc_and_test(semaphore) == 0
				.. now it assumes the role of a master.

				.. it will fail as the other CPU will never
                                renezvous (the do_work is set to zero).
				But we waste another 30ms spinning.


The end result is that after patching the semaphore should equal
num_online_cpus-1.


> > +
> > +        ASSERT(local_irq_is_enabled());
> > +
> > +        put_cpu_maps();
> > +
> > +        printk(XENLOG_DEBUG "%s finished with rc=%d\n", p->name, p->rc);
> > +    }
> > +    else
> > +    {
> > +        /* Wait for all CPUs to rendezvous. */
> > +        while ( xsplice_work.do_work && !xsplice_work.ready )
> > +        {
> > +            cpu_relax();
> > +            smp_rmb();
> > +        }
> > +
> 
> What happens here if the rendezvous initiator times out?  Looks like we
> will spin forever waiting for do_work which will never drop back to 0.

Ross answered that, but the other code (master) will set do_work to zero so
we will exit this.

> 
> > +        /* Disable IRQs and signal. */
> > +        local_irq_disable();
> > +        atomic_inc(&xsplice_work.irq_semaphore);
> > +
> > +        /* Wait for patching to complete. */
> > +        while ( xsplice_work.do_work )

Ditto for this.
> > +        {
> > +            cpu_relax();
> > +            smp_rmb();
> > +        }
> > +        local_irq_enable();
> 
> Splitting the modification of do_work and ready across multiple
> functions makes it particularly hard to reason about the correctness of
> the rendezvous.  It would be better to have a xsplice_rendezvous()
> function whose purpose was to negotiate the rendezvous only, using local
> static state.  The action can then be just the switch() from
> xsplice_do_single().

The earlier code was like that but it ended up being quite
big. Let me make it happen and leave the actions in the xsplice_do_single()
(and rename it to xsplice_do_action().


> 
> > +    }
> > +}
> > +
> > diff --git a/xen/include/asm-arm/nmi.h b/xen/include/asm-arm/nmi.h
> > index a60587e..82aff35 100644
> > --- a/xen/include/asm-arm/nmi.h
> > +++ b/xen/include/asm-arm/nmi.h
> > @@ -4,6 +4,19 @@
> >  #define register_guest_nmi_callback(a)  (-ENOSYS)
> >  #define unregister_guest_nmi_callback() (-ENOSYS)
> >  
> > +typedef int (*nmi_callback_t)(const struct cpu_user_regs *regs, int cpu);
> > +
> > +/**
> > + * set_nmi_callback
> > + *
> > + * Set a handler for an NMI. Only one handler may be
> > + * set. Return the old nmi callback handler.
> > + */
> > +static inline nmi_callback_t set_nmi_callback(nmi_callback_t callback)
> > +{
> > +    return NULL;
> > +}
> > +
> 
> This addition suggests that there should probably be an
> arch_xsplice_prepair_rendezvous() and arch_xsplice_finish_rendezvous().

Yes indeed.
> 
> ~Andrew
Konrad Rzeszutek Wilk Feb. 23, 2016, 8:43 p.m. UTC | #8
On Mon, Feb 22, 2016 at 03:00:31PM +0000, Ross Lagerwall wrote:
> On 02/12/2016 06:05 PM, Konrad Rzeszutek Wilk wrote:
> snip
> >+static void xsplice_do_single(unsigned int total_cpus)
> >+{
> >+    nmi_callback_t saved_nmi_callback;
> >+    struct payload *data, *tmp;
> >+    s_time_t timeout;
> >+    int rc;
> >+
> >+    data = xsplice_work.data;
> >+    timeout = xsplice_work.timeout + NOW();
> >+    if ( xsplice_do_wait(&xsplice_work.semaphore, timeout, total_cpus,
> >+                         "Timed out on CPU semaphore") )
> >+        return;
> >+
> >+    /* "Mask" NMIs. */
> >+    saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
> >+
> >+    /* All CPUs are waiting, now signal to disable IRQs. */
> >+    xsplice_work.ready = 1;
> >+    smp_wmb();
> >+
> >+    atomic_inc(&xsplice_work.irq_semaphore);
> >+    if ( xsplice_do_wait(&xsplice_work.irq_semaphore, timeout, total_cpus,
> >+                         "Timed out on IRQ semaphore.") )
> >+        return;
> >+
> >+    local_irq_disable();
> >+    /* Now this function should be the only one on any stack.
> >+     * No need to lock the payload list or applied list. */
> >+    switch ( xsplice_work.cmd )
> >+    {
> >+    case XSPLICE_ACTION_APPLY:
> >+        rc = apply_payload(data);
> >+        if ( rc == 0 )
> >+            data->state = XSPLICE_STATE_APPLIED;
> >+        break;
> >+    case XSPLICE_ACTION_REVERT:
> >+        rc = revert_payload(data);
> >+        if ( rc == 0 )
> >+            data->state = XSPLICE_STATE_CHECKED;
> >+        break;
> >+    case XSPLICE_ACTION_REPLACE:
> >+        list_for_each_entry_safe_reverse ( data, tmp, &applied_list, list )
> >+        {
> >+            data->rc = revert_payload(data);
> >+            if ( data->rc == 0 )
> >+                data->state = XSPLICE_STATE_CHECKED;
> >+            else
> >+            {
> >+                rc = -EINVAL;
> >+                break;
> >+            }
> >+        }
> 
> You're using data as a loop iterator here but the variable serves another
> purpose outside the loop. That's not gonna end well.

No not at all. I've added another variable: "other" that will be used in the loop.

> 
> -- 
> Ross Lagerwall
Konrad Rzeszutek Wilk Feb. 23, 2016, 8:47 p.m. UTC | #9
> 
> Also above, you've got:
> list_for_each_entry_safe_reverse ( data, tmp, &applied_list, list )
> 
> but it needs to be:
> list_for_each_entry_safe_reverse ( data, tmp, &applied_list, applied_list )

Totally mised it.
> 
> I'm not sure why this was changed from how I had it...

I had issues applying the patch so I modified it by hand - which
was of course the wrong thing to do. Let me also document
that we MUST use the 'applied_list' list.
> 
> rc is also used uninitialized in the replace path.
> 
> -- 
> Ross Lagerwall
Konrad Rzeszutek Wilk Feb. 23, 2016, 8:53 p.m. UTC | #10
On Tue, Feb 23, 2016 at 03:41:57PM -0500, Konrad Rzeszutek Wilk wrote:
> .. snip..
> > > + * Note that because of this NOP code the do_nmi is not safely patchable.
> > > + * Also if we do receive 'real' NMIs we have lost them.
> > 
> > The MCE path needs consideration as well.  Unlike the NMI path however,
> > that one cannot be ignored.
> > 
> > In both cases, it might be best to see about raising a tasklet or
> > softirq to pick up some deferred work.
> 
> I will put that in a seperate patch as this is patch is big enough.
> 

.. which will also fix the alternative_asm() usage of it.
Konrad Rzeszutek Wilk Feb. 23, 2016, 8:57 p.m. UTC | #11
> > > +static void reschedule_fn(void *unused)
> > > +{
> > > +    smp_mb(); /* Synchronize with setting do_work */
> > > +    raise_softirq(SCHEDULE_SOFTIRQ);
> > 
> > As you have to IPI each processor to raise a schedule softirq, you can
> > set a per-cpu "xsplice enter rendezvous" variable.  This prevents the
> > need for the return-to-guest path to poll one single byte.
> 
> .. Not sure I follow. The IPI we send to the other CPU is 0xfb - which
> makes the smp_call_function_interrupt run, which calls this function:
> reschedule_fn(). Then raise_softirq sets the bit on softirq_pending.
> 
> Great. Since we caused an IPI that means we ended up calling VMEXIT which
> eventually ends calling process_pending_softirqs() which calls schedule().
> And after that it calls check_for_xsplice_work().
> 
> Are you suggesting to add new softirq that would call in check_for_xsplice_work()?
> 
> Or are you suggesting to skip the softirq_pending check and all the
> code around that and instead have each VMEXIT code path check this
> per-cpu "xsplice enter" variable? If so, why not use the existing
> softirq infrastructure? 

N/m.

You were referring to the:

> > > +void do_xsplice(void)
..
> > > +    /* Fast path: no work to do. */
> > > +    if ( likely(!xsplice_work.do_work) )
> > > +        return;

which every CPU is going to do in when it calls idle_loop, svm_do_resume,
and vmx_do_resume.

Let me add that in!
Andrew Cooper Feb. 23, 2016, 9:10 p.m. UTC | #12
On 23/02/2016 20:41, Konrad Rzeszutek Wilk wrote:
> . snip..
>>> + * Note that because of this NOP code the do_nmi is not safely patchable.
>>> + * Also if we do receive 'real' NMIs we have lost them.
>> The MCE path needs consideration as well.  Unlike the NMI path however,
>> that one cannot be ignored.
>>
>> In both cases, it might be best to see about raising a tasklet or
>> softirq to pick up some deferred work.
> I will put that in a seperate patch as this is patch is big enough.

Actually, after subsequent thought, raising a tasklet wont help.

The biggest risk is the SMAP alternative in the asm entrypoints.  The
only way patching that can be made safe is to play fun and games with
debug traps. i.e.

Patch a 0xcc first
Then patch the rest of the bytes in the replacement
Then replace the 0xcc with the first byte of the replacement

This way if the codepath is hit while patching is in progress, you will
end up in the debug trap handler rather than executing junk.  There then
has to be some scheduling games for the NMI/MCE handler to take over
patching the code if it interrupted the patching pcpu.  Patching in
principle is a short operation, so performing it the handlers is not too
much of a problem.

The tricky part is patching the top of the debug trap handler and not
ending in an infinite loop.  I have a cunning idea, and will see if I
can find some copious free time to experiment with.

For v1 however, the implementation is fine.  It can be documented that
patching functions on the NMI/MCE path is liable to end in sadness, and
the asm entry points will have been taken care of during boot.

>
>>> + */
>>> +static int mask_nmi_callback(const struct cpu_user_regs *regs, int cpu)
>>> +{
>>> +    return 1;
>>> +}
>>> +
>>> +static void reschedule_fn(void *unused)
>>> +{
>>> +    smp_mb(); /* Synchronize with setting do_work */
>>> +    raise_softirq(SCHEDULE_SOFTIRQ);
>> As you have to IPI each processor to raise a schedule softirq, you can
>> set a per-cpu "xsplice enter rendezvous" variable.  This prevents the
>> need for the return-to-guest path to poll one single byte.
> .. Not sure I follow. The IPI we send to the other CPU is 0xfb - which
> makes the smp_call_function_interrupt run, which calls this function:
> reschedule_fn(). Then raise_softirq sets the bit on softirq_pending.

Correct

>
> Great. Since we caused an IPI that means we ended up calling VMEXIT which
> eventually ends calling process_pending_softirqs() which calls schedule().
> And after that it calls check_for_xsplice_work().

Correct

> Are you suggesting to add new softirq that would call in check_for_xsplice_work()?

No.  I am concerned that check_for_xsplice_work() is reading a single
global variable which, when patching is occurring, will be in a
repeatedly-dirtied cacheline.

> Or are you suggesting to skip the softirq_pending check and all the
> code around that and instead have each VMEXIT code path check this
> per-cpu "xsplice enter" variable? If so, why not use the existing
> softirq infrastructure? 

What I am suggesting is having reschedule_fn() set
this_cpu(xsplice_work_pending) = 1 and have check_for_xsplice_work()
check this_cpu(xsplice_work_pending) rather than the global semaphore.

This should ease the impact on the cache coherency fabric.

>
> .. snip..
>>> +}
>>> +
>>> +void do_xsplice(void)
>>> +{
>>> +    struct payload *p = xsplice_work.data;
>>> +    unsigned int cpu = smp_processor_id();
>>> +
>>> +    /* Fast path: no work to do. */
>>> +    if ( likely(!xsplice_work.do_work) )
>>> +        return;
>>> +    ASSERT(local_irq_is_enabled());
>>> +
>>> +    /* Set at -1, so will go up to num_online_cpus - 1 */
>>> +    if ( atomic_inc_and_test(&xsplice_work.semaphore) )
>>> +    {
>>> +        unsigned int total_cpus;
>>> +
>>> +        if ( !get_cpu_maps() )
>>> +        {
>>> +            printk(XENLOG_DEBUG "%s: CPU%u - unable to get cpu_maps lock.\n",
>>> +                   p->name, cpu);
>>> +            xsplice_work.data->rc = -EBUSY;
>>> +            xsplice_work.do_work = 0;
>>> +            return;
>> This error path leaves a ref in the semaphore.
> It does. And it also does so in xsplice_do_single() - if the xsplice_do_wait()
> fails, 
>>> +        }
>>> +
>>> +        barrier(); /* MUST do it after get_cpu_maps. */
>>> +        total_cpus = num_online_cpus() - 1;
>>> +
>>> +        if ( total_cpus )
>>> +        {
>>> +            printk(XENLOG_DEBUG "%s: CPU%u - IPIing the %u CPUs.\n", p->name,
>>> +                   cpu, total_cpus);
>>> +            smp_call_function(reschedule_fn, NULL, 0);
>>> +        }
>>> +        (void)xsplice_do_single(total_cpus);
> .. here, we never decrement the semaphore.
>
> Which is a safe-guard (documenting that).
>
> The issue here is that say we have two CPUs:
>
> CPU0				CPU1
>
> semaphore=0			semaphore=1
>  !get_cpu_maps()
>   do_work = 0;			.. now goes in the 'slave' part below and exits out
>                                 as do_work=0
>
> Now if we decremented the semaphore back on the error path:
>
> CPU0				CPU1
>
> semaphore=0			
>  !get_cpu_maps()
> 				.. do_work is still set.
>   do_work = 0;			
>                    
>   semaphore=-1
> 				atomic_inc_and_test(semaphore) == 0
> 				.. now it assumes the role of a master.
>
> 				.. it will fail as the other CPU will never
>                                 renezvous (the do_work is set to zero).
> 				But we waste another 30ms spinning.
>
>
> The end result is that after patching the semaphore should equal
> num_online_cpus-1.

Yay concurrency!  I am going to have to consider this more closely.

~Andrew
Jan Beulich Feb. 24, 2016, 9:31 a.m. UTC | #13
>>> On 23.02.16 at 22:10, <andrew.cooper3@citrix.com> wrote:
> On 23/02/2016 20:41, Konrad Rzeszutek Wilk wrote:
>> . snip..
>>>> + * Note that because of this NOP code the do_nmi is not safely patchable.
>>>> + * Also if we do receive 'real' NMIs we have lost them.
>>> The MCE path needs consideration as well.  Unlike the NMI path however,
>>> that one cannot be ignored.
>>>
>>> In both cases, it might be best to see about raising a tasklet or
>>> softirq to pick up some deferred work.
>> I will put that in a seperate patch as this is patch is big enough.
> 
> Actually, after subsequent thought, raising a tasklet wont help.
> 
> The biggest risk is the SMAP alternative in the asm entrypoints.  The
> only way patching that can be made safe is to play fun and games with
> debug traps. i.e.
> 
> Patch a 0xcc first
> Then patch the rest of the bytes in the replacement
> Then replace the 0xcc with the first byte of the replacement
> 
> This way if the codepath is hit while patching is in progress, you will
> end up in the debug trap handler rather than executing junk.  There then
> has to be some scheduling games for the NMI/MCE handler to take over
> patching the code if it interrupted the patching pcpu.  Patching in
> principle is a short operation, so performing it the handlers is not too
> much of a problem.
> 
> The tricky part is patching the top of the debug trap handler and not
> ending in an infinite loop.  I have a cunning idea, and will see if I
> can find some copious free time to experiment with.

For the SMAP patching this isn't the only way to make it safe, but
the alternative isn't suitable for xSplice: As long as the original
code is just NOPs, and as long as the starting address is aligned,
the first two bytes could become a short branch instead, patching
would fiddle with everything except the first two bytes first, and
as its last action (suitably fenced and maybe even cache flushed)
would atomically overwrite the first two bytes.

Jan
diff mbox

Patch

diff --git a/xen/arch/arm/xsplice.c b/xen/arch/arm/xsplice.c
index 8d85fa9..06f6875 100644
--- a/xen/arch/arm/xsplice.c
+++ b/xen/arch/arm/xsplice.c
@@ -3,7 +3,15 @@ 
 #include <xen/xsplice_elf.h>
 #include <xen/xsplice.h>
 
-int xsplice_verify_elf(uint8_t *data, ssize_t len)
+void xsplice_apply_jmp(struct xsplice_patch_func *func)
+{
+}
+
+void xsplice_revert_jmp(struct xsplice_patch_func *func)
+{
+}
+
+int xsplice_verify_elf(struct xsplice_elf *elf, uint8_t *data)
 {
     return -ENOSYS;
 }
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 9d43f7b..b5995b9 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -36,6 +36,7 @@ 
 #include <xen/cpu.h>
 #include <xen/wait.h>
 #include <xen/guest_access.h>
+#include <xen/xsplice.h>
 #include <public/sysctl.h>
 #include <public/hvm/hvm_vcpu.h>
 #include <asm/regs.h>
@@ -121,6 +122,7 @@  static void idle_loop(void)
         (*pm_idle)();
         do_tasklet();
         do_softirq();
+        do_xsplice(); /* Must be last. */
     }
 }
 
@@ -137,6 +139,7 @@  void startup_cpu_idle_loop(void)
 
 static void noreturn continue_idle_domain(struct vcpu *v)
 {
+    do_xsplice();
     reset_stack_and_jump(idle_loop);
 }
 
@@ -144,6 +147,7 @@  static void noreturn continue_nonidle_domain(struct vcpu *v)
 {
     check_wakeup_from_wait();
     mark_regs_dirty(guest_cpu_user_regs());
+    do_xsplice();
     reset_stack_and_jump(ret_from_intr);
 }
 
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index e62dfa1..340f23b 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -26,6 +26,7 @@ 
 #include <xen/hypercall.h>
 #include <xen/domain_page.h>
 #include <xen/xenoprof.h>
+#include <xen/xsplice.h>
 #include <asm/current.h>
 #include <asm/io.h>
 #include <asm/paging.h>
@@ -1108,6 +1109,7 @@  static void noreturn svm_do_resume(struct vcpu *v)
 
     hvm_do_resume(v);
 
+    do_xsplice();
     reset_stack_and_jump(svm_asm_do_resume);
 }
 
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 5bc3c74..1008163 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -25,6 +25,7 @@ 
 #include <xen/kernel.h>
 #include <xen/keyhandler.h>
 #include <xen/vm_event.h>
+#include <xen/xsplice.h>
 #include <asm/current.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
@@ -1716,6 +1717,7 @@  void vmx_do_resume(struct vcpu *v)
     }
 
     hvm_do_resume(v);
+    do_xsplice();
     reset_stack_and_jump(vmx_asm_do_vmentry);
 }
 
diff --git a/xen/arch/x86/xsplice.c b/xen/arch/x86/xsplice.c
index 814dd52..ae35e91 100644
--- a/xen/arch/x86/xsplice.c
+++ b/xen/arch/x86/xsplice.c
@@ -10,6 +10,25 @@ 
                             __func__,__LINE__, x); return x; }
 #endif
 
+#define PATCH_INSN_SIZE 5
+
+void xsplice_apply_jmp(struct xsplice_patch_func *func)
+{
+    uint32_t val;
+    uint8_t *old_ptr;
+
+    old_ptr = (uint8_t *)func->old_addr;
+    memcpy(func->undo, old_ptr, PATCH_INSN_SIZE);
+    *old_ptr++ = 0xe9; /* Relative jump */
+    val = func->new_addr - func->old_addr - PATCH_INSN_SIZE;
+    memcpy(old_ptr, &val, sizeof val);
+}
+
+void xsplice_revert_jmp(struct xsplice_patch_func *func)
+{
+    memcpy((void *)func->old_addr, func->undo, PATCH_INSN_SIZE);
+}
+
 int xsplice_verify_elf(struct xsplice_elf *elf, uint8_t *data)
 {
 
diff --git a/xen/common/xsplice.c b/xen/common/xsplice.c
index fbd6129..b854c0a 100644
--- a/xen/common/xsplice.c
+++ b/xen/common/xsplice.c
@@ -3,6 +3,7 @@ 
  *
  */
 
+#include <xen/cpu.h>
 #include <xen/guest_access.h>
 #include <xen/keyhandler.h>
 #include <xen/lib.h>
@@ -10,16 +11,25 @@ 
 #include <xen/mm.h>
 #include <xen/sched.h>
 #include <xen/smp.h>
+#include <xen/softirq.h>
 #include <xen/spinlock.h>
+#include <xen/wait.h>
 #include <xen/xsplice_elf.h>
 #include <xen/xsplice.h>
 
 #include <asm/event.h>
+#include <asm/nmi.h>
 #include <public/sysctl.h>
 
+/*
+ * Protects against payload_list operations and also allows only one
+ * caller in schedule_work.
+ */
 static DEFINE_SPINLOCK(payload_lock);
 static LIST_HEAD(payload_list);
 
+static LIST_HEAD(applied_list);
+
 static unsigned int payload_cnt;
 static unsigned int payload_version = 1;
 
@@ -29,6 +39,9 @@  struct payload {
     struct list_head list;               /* Linked to 'payload_list'. */
     void *payload_address;               /* Virtual address mapped. */
     size_t payload_pages;                /* Nr of the pages. */
+    struct list_head applied_list;       /* Linked to 'applied_list'. */
+    struct xsplice_patch_func *funcs;    /* The array of functions to patch. */
+    unsigned int nfuncs;                 /* Nr of functions to patch. */
 
     char name[XEN_XSPLICE_NAME_SIZE + 1];/* Name of it. */
 };
@@ -36,6 +49,24 @@  struct payload {
 static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len);
 static void free_payload_data(struct payload *payload);
 
+/* Defines an outstanding patching action. */
+struct xsplice_work
+{
+    atomic_t semaphore;          /* Used for rendezvous. First to grab it will
+                                    do the patching. */
+    atomic_t irq_semaphore;      /* Used to signal all IRQs disabled. */
+    uint32_t timeout;                    /* Timeout to do the operation. */
+    struct payload *data;        /* The payload on which to act. */
+    volatile bool_t do_work;     /* Signals work to do. */
+    volatile bool_t ready;       /* Signals all CPUs synchronized. */
+    uint32_t cmd;                /* Action request: XSPLICE_ACTION_* */
+};
+
+/* There can be only one outstanding patching action. */
+static struct xsplice_work xsplice_work;
+
+static int schedule_work(struct payload *data, uint32_t cmd, uint32_t timeout);
+
 static const char *state2str(int32_t state)
 {
 #define STATE(x) [XSPLICE_STATE_##x] = #x
@@ -61,14 +92,23 @@  static const char *state2str(int32_t state)
 static void xsplice_printall(unsigned char key)
 {
     struct payload *data;
+    unsigned int i;
 
     spin_lock(&payload_lock);
 
     list_for_each_entry ( data, &payload_list, list )
-        printk(" name=%s state=%s(%d) %p using %zu pages.\n", data->name,
+    {
+        printk(" name=%s state=%s(%d) %p using %zu pages:\n", data->name,
                state2str(data->state), data->state, data->payload_address,
                data->payload_pages);
 
+        for ( i = 0; i < data->nfuncs; i++ )
+        {
+            struct xsplice_patch_func *f = &(data->funcs[i]);
+            printk("    %s patch 0x%"PRIx64"(%u) with 0x%"PRIx64"(%u)\n",
+                   f->name, f->old_addr, f->old_size, f->new_addr, f->new_size);
+        }
+    }
     spin_unlock(&payload_lock);
 }
 
@@ -327,28 +367,22 @@  static int xsplice_action(xen_sysctl_xsplice_action_t *action)
     case XSPLICE_ACTION_REVERT:
         if ( data->state == XSPLICE_STATE_APPLIED )
         {
-            /* No implementation yet. */
-            data->state = XSPLICE_STATE_CHECKED;
-            data->rc = 0;
-            rc = 0;
+            data->rc = -EAGAIN;
+            rc = schedule_work(data, action->cmd, action->timeout);
         }
         break;
     case XSPLICE_ACTION_APPLY:
         if ( (data->state == XSPLICE_STATE_CHECKED) )
         {
-            /* No implementation yet. */
-            data->state = XSPLICE_STATE_APPLIED;
-            data->rc = 0;
-            rc = 0;
+            data->rc = -EAGAIN;
+            rc = schedule_work(data, action->cmd, action->timeout);
         }
         break;
     case XSPLICE_ACTION_REPLACE:
         if ( data->state == XSPLICE_STATE_CHECKED )
         {
-            /* No implementation yet. */
-            data->state = XSPLICE_STATE_CHECKED;
-            data->rc = 0;
-            rc = 0;
+            data->rc = -EAGAIN;
+            rc = schedule_work(data, action->cmd, action->timeout);
         }
         break;
     default:
@@ -576,6 +610,62 @@  static int move_payload(struct payload *payload, struct xsplice_elf *elf)
     return 0;
 }
 
+static int check_special_sections(struct payload *payload,
+                                  struct xsplice_elf *elf)
+{
+    unsigned int i;
+    static const char *const names[] = { ".xsplice.funcs" };
+
+    for ( i = 0; i < ARRAY_SIZE(names); i++ )
+    {
+        struct xsplice_elf_sec *sec;
+
+        sec = xsplice_elf_sec_by_name(elf, names[i]);
+        if ( !sec )
+        {
+            printk(XENLOG_ERR "%s: %s is missing!\n", names[i],elf->name);
+            return -EINVAL;
+        }
+        if ( !sec->sec->sh_size )
+            return -EINVAL;
+    }
+    return 0;
+}
+
+static int find_special_sections(struct payload *payload,
+                                 struct xsplice_elf *elf)
+{
+    struct xsplice_elf_sec *sec;
+    unsigned int i;
+    struct xsplice_patch_func *f;
+
+    sec = xsplice_elf_sec_by_name(elf, ".xsplice.funcs");
+    if ( sec->sec->sh_size % sizeof *payload->funcs )
+        return -EINVAL;
+
+    payload->funcs = (struct xsplice_patch_func *)sec->load_addr;
+    payload->nfuncs = sec->sec->sh_size / (sizeof *payload->funcs);
+
+    for ( i = 0; i < payload->nfuncs; i++ )
+    {
+        unsigned int j;
+
+        f = &(payload->funcs[i]);
+
+        if ( !f->new_addr || !f->old_addr || !f->old_size || !f->new_size )
+            return -EINVAL;
+
+        for ( j = 0; j < 8; j ++ )
+            if ( f->undo[j] )
+                return -EINVAL;
+
+        for ( j = 0; j < 24; j ++ )
+            if ( f->pad[j] )
+                return -EINVAL;
+    }
+    return 0;
+}
+
 static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len)
 {
     struct xsplice_elf elf;
@@ -605,7 +695,14 @@  static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len)
     if ( rc )
         goto err_payload;
 
-    /* Free our temporary data structure. */
+    rc = check_special_sections(payload, &elf);
+    if ( rc )
+        goto err_payload;
+
+    rc = find_special_sections(payload, &elf);
+    if ( rc )
+        goto err_payload;
+
     xsplice_elf_free(&elf);
     return 0;
 
@@ -617,6 +714,253 @@  static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len)
     return rc;
 }
 
+
+/*
+ * The following functions get the CPUs into an appropriate state and
+ * apply (or revert) each of the module's functions.
+ */
+
+/*
+ * This function is executed having all other CPUs with no stack (we may
+ * have cpu_idle on it) and IRQs disabled. We guard against NMI by temporarily
+ * installing our NOP NMI handler.
+ */
+static int apply_payload(struct payload *data)
+{
+    unsigned int i;
+
+    printk(XENLOG_DEBUG "%s: Applying %u functions.\n", data->name,
+           data->nfuncs);
+
+    for ( i = 0; i < data->nfuncs; i++ )
+        xsplice_apply_jmp(data->funcs + i);
+
+    list_add_tail(&data->applied_list, &applied_list);
+
+    return 0;
+}
+
+/*
+ * This function is executed having all other CPUs with no stack (we may
+ * have cpu_idle on it) and IRQs disabled.
+ */
+static int revert_payload(struct payload *data)
+{
+    unsigned int i;
+
+    printk(XENLOG_DEBUG "%s: Reverting.\n", data->name);
+
+    for ( i = 0; i < data->nfuncs; i++ )
+        xsplice_revert_jmp(data->funcs + i);
+
+    list_del(&data->applied_list);
+
+    return 0;
+}
+
+/* Must be holding the payload_lock. */
+static int schedule_work(struct payload *data, uint32_t cmd, uint32_t timeout)
+{
+    /* Fail if an operation is already scheduled. */
+    if ( xsplice_work.do_work )
+        return -EBUSY;
+
+    xsplice_work.cmd = cmd;
+    xsplice_work.data = data;
+    xsplice_work.timeout = timeout ? timeout : MILLISECS(30);
+
+    printk(XENLOG_DEBUG "%s: timeout is %"PRI_stime"ms\n", data->name,
+           xsplice_work.timeout / MILLISECS(1));
+
+    atomic_set(&xsplice_work.semaphore, -1);
+    atomic_set(&xsplice_work.irq_semaphore, -1);
+
+    xsplice_work.ready = 0;
+    smp_wmb();
+    xsplice_work.do_work = 1;
+    smp_wmb();
+
+    return 0;
+}
+
+/*
+ * Note that because of this NOP code the do_nmi is not safely patchable.
+ * Also if we do receive 'real' NMIs we have lost them.
+ */
+static int mask_nmi_callback(const struct cpu_user_regs *regs, int cpu)
+{
+    return 1;
+}
+
+static void reschedule_fn(void *unused)
+{
+    smp_mb(); /* Synchronize with setting do_work */
+    raise_softirq(SCHEDULE_SOFTIRQ);
+}
+
+static int xsplice_do_wait(atomic_t *counter, s_time_t timeout,
+                           unsigned int total_cpus, const char *s)
+{
+    int rc = 0;
+
+    while ( atomic_read(counter) != total_cpus && NOW() < timeout )
+        cpu_relax();
+
+    /* Log & abort. */
+    if ( atomic_read(counter) != total_cpus )
+    {
+        printk(XENLOG_DEBUG "%s: %s %u/%u\n", xsplice_work.data->name,
+               s, atomic_read(counter), total_cpus);
+        rc = -EBUSY;
+        xsplice_work.data->rc = rc;
+        xsplice_work.do_work = 0;
+        smp_wmb();
+        return rc;
+    }
+    return rc;
+}
+
+static void xsplice_do_single(unsigned int total_cpus)
+{
+    nmi_callback_t saved_nmi_callback;
+    struct payload *data, *tmp;
+    s_time_t timeout;
+    int rc;
+
+    data = xsplice_work.data;
+    timeout = xsplice_work.timeout + NOW();
+    if ( xsplice_do_wait(&xsplice_work.semaphore, timeout, total_cpus,
+                         "Timed out on CPU semaphore") )
+        return;
+
+    /* "Mask" NMIs. */
+    saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
+
+    /* All CPUs are waiting, now signal to disable IRQs. */
+    xsplice_work.ready = 1;
+    smp_wmb();
+
+    atomic_inc(&xsplice_work.irq_semaphore);
+    if ( xsplice_do_wait(&xsplice_work.irq_semaphore, timeout, total_cpus,
+                         "Timed out on IRQ semaphore.") )
+        return;
+
+    local_irq_disable();
+    /* Now this function should be the only one on any stack.
+     * No need to lock the payload list or applied list. */
+    switch ( xsplice_work.cmd )
+    {
+    case XSPLICE_ACTION_APPLY:
+        rc = apply_payload(data);
+        if ( rc == 0 )
+            data->state = XSPLICE_STATE_APPLIED;
+        break;
+    case XSPLICE_ACTION_REVERT:
+        rc = revert_payload(data);
+        if ( rc == 0 )
+            data->state = XSPLICE_STATE_CHECKED;
+        break;
+    case XSPLICE_ACTION_REPLACE:
+        list_for_each_entry_safe_reverse ( data, tmp, &applied_list, list )
+        {
+            data->rc = revert_payload(data);
+            if ( data->rc == 0 )
+                data->state = XSPLICE_STATE_CHECKED;
+            else
+            {
+                rc = -EINVAL;
+                break;
+            }
+        }
+        if ( rc != -EINVAL )
+        {
+            rc = apply_payload(xsplice_work.data);
+            if ( rc == 0 )
+                xsplice_work.data->state = XSPLICE_STATE_APPLIED;
+        }
+        break;
+    default:
+        rc = -EINVAL;
+        break;
+    }
+
+    xsplice_work.data->rc = rc;
+
+    local_irq_enable();
+    set_nmi_callback(saved_nmi_callback);
+
+    xsplice_work.do_work = 0;
+    smp_wmb(); /* Synchronize with waiting CPUs. */
+}
+
+/*
+ * The main function which manages the work of quiescing the system and
+ * patching code.
+ */
+void do_xsplice(void)
+{
+    struct payload *p = xsplice_work.data;
+    unsigned int cpu = smp_processor_id();
+
+    /* Fast path: no work to do. */
+    if ( likely(!xsplice_work.do_work) )
+        return;
+    ASSERT(local_irq_is_enabled());
+
+    /* Set at -1, so will go up to num_online_cpus - 1 */
+    if ( atomic_inc_and_test(&xsplice_work.semaphore) )
+    {
+        unsigned int total_cpus;
+
+        if ( !get_cpu_maps() )
+        {
+            printk(XENLOG_DEBUG "%s: CPU%u - unable to get cpu_maps lock.\n",
+                   p->name, cpu);
+            xsplice_work.data->rc = -EBUSY;
+            xsplice_work.do_work = 0;
+            return;
+        }
+
+        barrier(); /* MUST do it after get_cpu_maps. */
+        total_cpus = num_online_cpus() - 1;
+
+        if ( total_cpus )
+        {
+            printk(XENLOG_DEBUG "%s: CPU%u - IPIing the %u CPUs.\n", p->name,
+                   cpu, total_cpus);
+            smp_call_function(reschedule_fn, NULL, 0);
+        }
+        (void)xsplice_do_single(total_cpus);
+
+        ASSERT(local_irq_is_enabled());
+
+        put_cpu_maps();
+
+        printk(XENLOG_DEBUG "%s finished with rc=%d\n", p->name, p->rc);
+    }
+    else
+    {
+        /* Wait for all CPUs to rendezvous. */
+        while ( xsplice_work.do_work && !xsplice_work.ready )
+        {
+            cpu_relax();
+            smp_rmb();
+        }
+
+        /* Disable IRQs and signal. */
+        local_irq_disable();
+        atomic_inc(&xsplice_work.irq_semaphore);
+
+        /* Wait for patching to complete. */
+        while ( xsplice_work.do_work )
+        {
+            cpu_relax();
+            smp_rmb();
+        }
+        local_irq_enable();
+    }
+}
+
 static int __init xsplice_init(void)
 {
     register_keyhandler('x', xsplice_printall, "print xsplicing info", 1);
diff --git a/xen/common/xsplice_elf.c b/xen/common/xsplice_elf.c
index 0717263..ad70797 100644
--- a/xen/common/xsplice_elf.c
+++ b/xen/common/xsplice_elf.c
@@ -228,18 +228,18 @@  int xsplice_elf_resolve_symbols(struct xsplice_elf *elf)
                 return_(-ENOENT);
                 break;
             case SHN_ABS:
-                printk(XENLOG_DEBUG "%s: Absolute symbol: %s => 0x%p\n",
+                printk(XENLOG_DEBUG "%s: Absolute symbol: %s => 0x%"PRIx64"\n",
                       elf->name, elf->sym[i].name,
-                      (void *)elf->sym[i].sym->st_value);
+                      elf->sym[i].sym->st_value);
                 break;
             default:
                 if ( elf->sec[elf->sym[i].sym->st_shndx].sec->sh_flags & SHF_ALLOC )
                 {
                     elf->sym[i].sym->st_value +=
                         (unsigned long)elf->sec[elf->sym[i].sym->st_shndx].load_addr;
-                    printk(XENLOG_DEBUG "%s: Symbol resolved: %s => 0x%p\n",
+                    printk(XENLOG_DEBUG "%s: Symbol resolved: %s => 0x%"PRIx64"\n",
                            elf->name, elf->sym[i].name,
-                           (void *)elf->sym[i].sym->st_value);
+                           elf->sym[i].sym->st_value);
                 }
         }
     }
diff --git a/xen/include/asm-arm/nmi.h b/xen/include/asm-arm/nmi.h
index a60587e..82aff35 100644
--- a/xen/include/asm-arm/nmi.h
+++ b/xen/include/asm-arm/nmi.h
@@ -4,6 +4,19 @@ 
 #define register_guest_nmi_callback(a)  (-ENOSYS)
 #define unregister_guest_nmi_callback() (-ENOSYS)
 
+typedef int (*nmi_callback_t)(const struct cpu_user_regs *regs, int cpu);
+
+/**
+ * set_nmi_callback
+ *
+ * Set a handler for an NMI. Only one handler may be
+ * set. Return the old nmi callback handler.
+ */
+static inline nmi_callback_t set_nmi_callback(nmi_callback_t callback)
+{
+    return NULL;
+}
+
 #endif /* ASM_NMI_H */
 /*
  * Local variables:
diff --git a/xen/include/xen/xsplice.h b/xen/include/xen/xsplice.h
index d71c898..d6db1c2 100644
--- a/xen/include/xen/xsplice.h
+++ b/xen/include/xen/xsplice.h
@@ -6,8 +6,26 @@  struct xsplice_elf_sec;
 struct xsplice_elf_sym;
 struct xen_sysctl_xsplice_op;
 
+#include <xen/elfstructs.h>
+/*
+ * The structure which defines the patching. This is what the hypervisor
+ * expects in the '.xsplice.func' section of the ELF file.
+ *
+ * This MUST be in sync with what the tools generate.
+ */
+struct xsplice_patch_func {
+    const char *name;
+    Elf64_Xword new_addr;
+    Elf64_Xword old_addr;
+    Elf64_Word new_size;
+    Elf64_Word old_size;
+    uint8_t undo[8];
+    uint8_t pad[24];
+};
+
 #ifdef CONFIG_XSPLICE
 int xsplice_control(struct xen_sysctl_xsplice_op *);
+void do_xsplice(void);
 
 /* Arch hooks */
 int xsplice_verify_elf(struct xsplice_elf *elf, uint8_t *data);
@@ -17,11 +35,14 @@  int xsplice_perform_rel(struct xsplice_elf *elf,
 int xsplice_perform_rela(struct xsplice_elf *elf,
                          struct xsplice_elf_sec *base,
                          struct xsplice_elf_sec *rela);
+void xsplice_apply_jmp(struct xsplice_patch_func *func);
+void xsplice_revert_jmp(struct xsplice_patch_func *func);
 #else
 #include <xen/errno.h> /* For -ENOSYS */
 static inline int xsplice_control(struct xen_sysctl_xsplice_op *op)
 {
     return -ENOSYS;
 }
+static inline void do_xsplice(void) { };
 #endif
 #endif /* __XEN_XSPLICE_H__ */