diff mbox

[v2,09/13] xsplice: Implement support for applying/reverting/replacing patches. (v2)

Message ID 1452808031-706-10-git-send-email-konrad.wilk@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Konrad Rzeszutek Wilk Jan. 14, 2016, 9:47 p.m. UTC
From: Ross Lagerwall <ross.lagerwall@citrix.com>

Implement support for the apply, revert and replace actions.

To perform and action on a payload, the hypercall sets up a data
structure to schedule the work.  A hook is added in all the
return-to-guest paths to check for work to do and execute it if needed.
In this way, patches can be applied with all CPUs idle and without
stacks.  The first CPU to do_xsplice() becomes the master and triggers a
reschedule softirq to trigger all the other CPUs to enter do_xsplice()
with no stack.  Once all CPUs have rendezvoused, all CPUs disable IRQs
and NMIs are ignored. The system is then quiscient and the master
performs the action.  After this, all CPUs enable IRQs and NMIs are
re-enabled.

The action to perform is one of:
- APPLY: For each function in the module, store the first 5 bytes of the
  old function and replace it with a jump to the new function.
- REVERT: Copy the previously stored bytes into the first 5 bytes of the
  old function.
- REPLACE: Revert each applied module and then apply the new module.

To prevent a deadlock with any other barrier in the system, the master
will wait for up to 30ms before timing out.  I've taken some
measurements and found the patch application to take about 100 ?s on a
72 CPU system, whether idle or fully loaded.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
--
 v2: - Pluck the 'struct xsplice_patch_func' in this patch.
     - Modify code per review comments.
     - Add more data in the keyboard handler.
     - Redo the patching code, split it in functions.
---
 xen/arch/arm/xsplice.c      |  10 +-
 xen/arch/x86/domain.c       |   4 +
 xen/arch/x86/hvm/svm/svm.c  |   2 +
 xen/arch/x86/hvm/vmx/vmcs.c |   2 +
 xen/arch/x86/xsplice.c      |  19 +++
 xen/common/xsplice.c        | 389 ++++++++++++++++++++++++++++++++++++++++----
 xen/include/asm-arm/nmi.h   |  13 ++
 xen/include/xen/xsplice.h   |  24 +++
 8 files changed, 432 insertions(+), 31 deletions(-)

Comments

Ross Lagerwall Jan. 19, 2016, 2:39 p.m. UTC | #1
On 01/14/2016 09:47 PM, Konrad Rzeszutek Wilk wrote:
> From: Ross Lagerwall <ross.lagerwall@citrix.com>
>
> Implement support for the apply, revert and replace actions.
>
snip
> +#include <xen/cpu.h>
>   #include <xen/guest_access.h>
>   #include <xen/keyhandler.h>
>   #include <xen/lib.h>
> @@ -10,25 +11,38 @@
>   #include <xen/mm.h>
>   #include <xen/sched.h>
>   #include <xen/smp.h>
> +#include <xen/softirq.h>
>   #include <xen/spinlock.h>
> +#include <xen/wait.h>
>   #include <xen/xsplice_elf.h>
>   #include <xen/xsplice.h>
>
>   #include <asm/event.h>
> +#include <asm/nmi.h>
>   #include <public/sysctl.h>
>
> -static DEFINE_SPINLOCK(payload_list_lock);
> +/*
> + * Protects against payload_list operations and also allows only one
> + * caller in schedule_work.
> + */
> +static DEFINE_SPINLOCK(payload_lock);

I think it would be cleaner if all the payload_list_lock changes were 
folded into Patch 3.

>   static LIST_HEAD(payload_list);
>
> +static LIST_HEAD(applied_list);
> +
>   static unsigned int payload_cnt;
>   static unsigned int payload_version = 1;
>
>   struct payload {
>       int32_t state;                       /* One of the XSPLICE_STATE_*. */
>       int32_t rc;                          /* 0 or -XEN_EXX. */
> +    uint32_t timeout;                    /* Timeout to do the operation. */

This should go into struct xsplice_work.

>       struct list_head list;               /* Linked to 'payload_list'. */
>       void *payload_address;               /* Virtual address mapped. */
>       size_t payload_pages;                /* Nr of the pages. */
> +    struct list_head applied_list;       /* Linked to 'applied_list'. */
> +    struct xsplice_patch_func *funcs;    /* The array of functions to patch. */
> +    unsigned int nfuncs;                 /* Nr of functions to patch. */
>
>       char name[XEN_XSPLICE_NAME_SIZE + 1];/* Name of it. */
>   };
> @@ -36,6 +50,23 @@ struct payload {
>   static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len);
>   static void free_payload_data(struct payload *payload);
>
> +/* Defines an outstanding patching action. */
> +struct xsplice_work
> +{
> +    atomic_t semaphore;          /* Used for rendezvous. First to grab it will
> +                                    do the patching. */
> +    atomic_t irq_semaphore;      /* Used to signal all IRQs disabled. */
> +    struct payload *data;        /* The payload on which to act. */
> +    volatile bool_t do_work;     /* Signals work to do. */
> +    volatile bool_t ready;       /* Signals all CPUs synchronized. */
> +    uint32_t cmd;                /* Action request: XSPLICE_ACTION_* */
> +};
> +
> +/* There can be only one outstanding patching action. */
> +static struct xsplice_work xsplice_work;
> +
> +static int schedule_work(struct payload *data, uint32_t cmd);
> +
snip
> +
> +/*
> + * This function is executed having all other CPUs with no stack (we may
> + * have cpu_idle on it) and IRQs disabled.
> + */
> +static int revert_payload(struct payload *data)
> +{
> +    unsigned int i;
> +
> +    printk(XENLOG_DEBUG "%s: Reverting.\n", data->name);
> +
> +    for ( i = 0; i < data->nfuncs; i++ )
> +        xsplice_revert_jmp(data->funcs + i);
> +
> +    list_del(&data->applied_list);
> +
> +    return 0;
> +}
> +
> +/* Must be holding the payload_list lock. */

payload lock?

> +static int schedule_work(struct payload *data, uint32_t cmd)
> +{
> +    /* Fail if an operation is already scheduled. */
> +    if ( xsplice_work.do_work )
> +        return -EAGAIN;

Hmm, I don't think EAGAIN is correct. It will cause xen-xsplice to poll 
for a status update, but the operation hasn't actually been submitted.

> +
> +    xsplice_work.cmd = cmd;
> +    xsplice_work.data = data;
> +    atomic_set(&xsplice_work.semaphore, -1);
> +    atomic_set(&xsplice_work.irq_semaphore, -1);
> +
> +    xsplice_work.ready = 0;
> +    smp_wmb();
> +    xsplice_work.do_work = 1;
> +    smp_wmb();
> +
> +    return 0;
> +}
> +
> +/*
> + * Note that because of this NOP code the do_nmi is not safely patchable.
> + * Also if we do receive 'real' NMIs we have lost them.
> + */
> +static int mask_nmi_callback(const struct cpu_user_regs *regs, int cpu)
> +{
> +    return 1;
> +}
> +
> +static void reschedule_fn(void *unused)
> +{
> +    smp_mb(); /* Synchronize with setting do_work */
> +    raise_softirq(SCHEDULE_SOFTIRQ);
> +}
> +
> +static int xsplice_do_wait(atomic_t *counter, s_time_t timeout,
> +                           unsigned int total_cpus, const char *s)
> +{
> +    int rc = 0;
> +
> +    while ( atomic_read(counter) != total_cpus && NOW() < timeout )
> +        cpu_relax();
> +
> +    /* Log & abort. */
> +    if ( atomic_read(counter) != total_cpus )
> +    {
> +        printk(XENLOG_DEBUG "%s: %s %u/%u\n", xsplice_work.data->name,
> +               s, atomic_read(counter), total_cpus);
> +        rc = -EBUSY;
> +        xsplice_work.data->rc = rc;
> +        xsplice_work.do_work = 0;
> +        smp_wmb();
> +        return rc;
> +    }
> +    return rc;
> +}
> +
> +static void xsplice_do_single(unsigned int total_cpus)
> +{
> +    nmi_callback_t saved_nmi_callback;
> +    s_time_t timeout;
> +    struct payload *data, *tmp;
> +    int rc;
> +
> +    data = xsplice_work.data;
> +    timeout = data->timeout ? data->timeout : MILLISECS(30);

The design doc says that a timeout of 0 means infinity.

> +    printk(XENLOG_DEBUG "%s: timeout is %"PRI_stime"ms\n", data->name,
> +           timeout / MILLISECS(1));
> +
> +    timeout += NOW();
> +
> +    if ( xsplice_do_wait(&xsplice_work.semaphore, timeout, total_cpus,
> +                         "Timed out on CPU semaphore") )
> +        return;
> +
> +    /* "Mask" NMIs. */
> +    saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
> +
> +    /* All CPUs are waiting, now signal to disable IRQs. */
> +    xsplice_work.ready = 1;
> +    smp_wmb();
> +
> +    atomic_inc(&xsplice_work.irq_semaphore);
> +    if ( xsplice_do_wait(&xsplice_work.irq_semaphore, timeout, total_cpus,
> +                         "Timed out on IRQ semaphore.") )
> +        return;
> +
> +    local_irq_disable();

As far as I can tell, the mechanics of how this works haven't changed, 
the code has just been reorganized. Which means the points that Martin 
raised about this mechanism are still outstanding.
Konrad Rzeszutek Wilk Jan. 19, 2016, 4:55 p.m. UTC | #2
On Tue, Jan 19, 2016 at 02:39:40PM +0000, Ross Lagerwall wrote:
> On 01/14/2016 09:47 PM, Konrad Rzeszutek Wilk wrote:
> >From: Ross Lagerwall <ross.lagerwall@citrix.com>
> >
> >Implement support for the apply, revert and replace actions.
> >
> snip
> >+#include <xen/cpu.h>
> >  #include <xen/guest_access.h>
> >  #include <xen/keyhandler.h>
> >  #include <xen/lib.h>
> >@@ -10,25 +11,38 @@
> >  #include <xen/mm.h>
> >  #include <xen/sched.h>
> >  #include <xen/smp.h>
> >+#include <xen/softirq.h>
> >  #include <xen/spinlock.h>
> >+#include <xen/wait.h>
> >  #include <xen/xsplice_elf.h>
> >  #include <xen/xsplice.h>
> >
> >  #include <asm/event.h>
> >+#include <asm/nmi.h>
> >  #include <public/sysctl.h>
> >
> >-static DEFINE_SPINLOCK(payload_list_lock);
> >+/*
> >+ * Protects against payload_list operations and also allows only one
> >+ * caller in schedule_work.
> >+ */
> >+static DEFINE_SPINLOCK(payload_lock);
> 
> I think it would be cleaner if all the payload_list_lock changes were folded
> into Patch 3.

Good idea.
> 
> >  static LIST_HEAD(payload_list);
> >
> >+static LIST_HEAD(applied_list);
> >+
> >  static unsigned int payload_cnt;
> >  static unsigned int payload_version = 1;
> >
> >  struct payload {
> >      int32_t state;                       /* One of the XSPLICE_STATE_*. */
> >      int32_t rc;                          /* 0 or -XEN_EXX. */
> >+    uint32_t timeout;                    /* Timeout to do the operation. */
> 
> This should go into struct xsplice_work.

/me nods.
> 
> >      struct list_head list;               /* Linked to 'payload_list'. */
> >      void *payload_address;               /* Virtual address mapped. */
> >      size_t payload_pages;                /* Nr of the pages. */
> >+    struct list_head applied_list;       /* Linked to 'applied_list'. */
> >+    struct xsplice_patch_func *funcs;    /* The array of functions to patch. */
> >+    unsigned int nfuncs;                 /* Nr of functions to patch. */
> >
> >      char name[XEN_XSPLICE_NAME_SIZE + 1];/* Name of it. */
> >  };
> >@@ -36,6 +50,23 @@ struct payload {
> >  static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len);
> >  static void free_payload_data(struct payload *payload);
> >
> >+/* Defines an outstanding patching action. */
> >+struct xsplice_work
> >+{
> >+    atomic_t semaphore;          /* Used for rendezvous. First to grab it will
> >+                                    do the patching. */
> >+    atomic_t irq_semaphore;      /* Used to signal all IRQs disabled. */
> >+    struct payload *data;        /* The payload on which to act. */
> >+    volatile bool_t do_work;     /* Signals work to do. */
> >+    volatile bool_t ready;       /* Signals all CPUs synchronized. */
> >+    uint32_t cmd;                /* Action request: XSPLICE_ACTION_* */
> >+};
> >+
> >+/* There can be only one outstanding patching action. */
> >+static struct xsplice_work xsplice_work;
> >+
> >+static int schedule_work(struct payload *data, uint32_t cmd);
> >+
> snip
> >+
> >+/*
> >+ * This function is executed having all other CPUs with no stack (we may
> >+ * have cpu_idle on it) and IRQs disabled.
> >+ */
> >+static int revert_payload(struct payload *data)
> >+{
> >+    unsigned int i;
> >+
> >+    printk(XENLOG_DEBUG "%s: Reverting.\n", data->name);
> >+
> >+    for ( i = 0; i < data->nfuncs; i++ )
> >+        xsplice_revert_jmp(data->funcs + i);
> >+
> >+    list_del(&data->applied_list);
> >+
> >+    return 0;
> >+}
> >+
> >+/* Must be holding the payload_list lock. */
> 
> payload lock?
> 
> >+static int schedule_work(struct payload *data, uint32_t cmd)
> >+{
> >+    /* Fail if an operation is already scheduled. */
> >+    if ( xsplice_work.do_work )
> >+        return -EAGAIN;
> 
> Hmm, I don't think EAGAIN is correct. It will cause xen-xsplice to poll for
> a status update, but the operation hasn't actually been submitted.

-EBUSY -EDEADLK ?
> 
> >+
> >+    xsplice_work.cmd = cmd;
> >+    xsplice_work.data = data;
> >+    atomic_set(&xsplice_work.semaphore, -1);
> >+    atomic_set(&xsplice_work.irq_semaphore, -1);
> >+
> >+    xsplice_work.ready = 0;
> >+    smp_wmb();
> >+    xsplice_work.do_work = 1;
> >+    smp_wmb();
> >+
> >+    return 0;
> >+}
> >+
> >+/*
> >+ * Note that because of this NOP code the do_nmi is not safely patchable.
> >+ * Also if we do receive 'real' NMIs we have lost them.
> >+ */
> >+static int mask_nmi_callback(const struct cpu_user_regs *regs, int cpu)
> >+{
> >+    return 1;
> >+}
> >+
> >+static void reschedule_fn(void *unused)
> >+{
> >+    smp_mb(); /* Synchronize with setting do_work */
> >+    raise_softirq(SCHEDULE_SOFTIRQ);
> >+}
> >+
> >+static int xsplice_do_wait(atomic_t *counter, s_time_t timeout,
> >+                           unsigned int total_cpus, const char *s)
> >+{
> >+    int rc = 0;
> >+
> >+    while ( atomic_read(counter) != total_cpus && NOW() < timeout )
> >+        cpu_relax();
> >+
> >+    /* Log & abort. */
> >+    if ( atomic_read(counter) != total_cpus )
> >+    {
> >+        printk(XENLOG_DEBUG "%s: %s %u/%u\n", xsplice_work.data->name,
> >+               s, atomic_read(counter), total_cpus);
> >+        rc = -EBUSY;
> >+        xsplice_work.data->rc = rc;
> >+        xsplice_work.do_work = 0;
> >+        smp_wmb();
> >+        return rc;
> >+    }
> >+    return rc;
> >+}
> >+
> >+static void xsplice_do_single(unsigned int total_cpus)
> >+{
> >+    nmi_callback_t saved_nmi_callback;
> >+    s_time_t timeout;
> >+    struct payload *data, *tmp;
> >+    int rc;
> >+
> >+    data = xsplice_work.data;
> >+    timeout = data->timeout ? data->timeout : MILLISECS(30);
> 
> The design doc says that a timeout of 0 means infinity.

True. Lets update the document.
> 
> >+    printk(XENLOG_DEBUG "%s: timeout is %"PRI_stime"ms\n", data->name,
> >+           timeout / MILLISECS(1));
> >+
> >+    timeout += NOW();
> >+
> >+    if ( xsplice_do_wait(&xsplice_work.semaphore, timeout, total_cpus,
> >+                         "Timed out on CPU semaphore") )
> >+        return;
> >+
> >+    /* "Mask" NMIs. */
> >+    saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
> >+
> >+    /* All CPUs are waiting, now signal to disable IRQs. */
> >+    xsplice_work.ready = 1;
> >+    smp_wmb();
> >+
> >+    atomic_inc(&xsplice_work.irq_semaphore);
> >+    if ( xsplice_do_wait(&xsplice_work.irq_semaphore, timeout, total_cpus,
> >+                         "Timed out on IRQ semaphore.") )
> >+        return;
> >+
> >+    local_irq_disable();
> 
> As far as I can tell, the mechanics of how this works haven't changed, the
> code has just been reorganized. Which means the points that Martin raised
> about this mechanism are still outstanding.

A bit. I added the extra timeout on both of the 'spin-around' and also
moved some of the barriers around. Also removed your spin-lock and used
the atomic_t mechanism to synchronize.

But the one thing that I didn't do was the spin on the 'workers?' that
are just spinnig idly. They will do that forever if say the 'master'
hasn't gone to the IRQ semaphore part.

My thinking was that the 'workers' could also use the timeout feature
but just multiple it by two?

> 
> -- 
> Ross Lagerwall
Ross Lagerwall Jan. 25, 2016, 11:43 a.m. UTC | #3
On 01/19/2016 04:55 PM, Konrad Rzeszutek Wilk wrote:
snip
>>> +/* Must be holding the payload_list lock. */
>>
>> payload lock?
>>
>>> +static int schedule_work(struct payload *data, uint32_t cmd)
>>> +{
>>> +    /* Fail if an operation is already scheduled. */
>>> +    if ( xsplice_work.do_work )
>>> +        return -EAGAIN;
>>
>> Hmm, I don't think EAGAIN is correct. It will cause xen-xsplice to poll for
>> a status update, but the operation hasn't actually been submitted.
>
> -EBUSY -EDEADLK ?

I would choose -EBUSY.

>>
>>> +
>>> +    xsplice_work.cmd = cmd;
>>> +    xsplice_work.data = data;
>>> +    atomic_set(&xsplice_work.semaphore, -1);
>>> +    atomic_set(&xsplice_work.irq_semaphore, -1);
>>> +
>>> +    xsplice_work.ready = 0;
>>> +    smp_wmb();
>>> +    xsplice_work.do_work = 1;
>>> +    smp_wmb();
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +/*
>>> + * Note that because of this NOP code the do_nmi is not safely patchable.
>>> + * Also if we do receive 'real' NMIs we have lost them.
>>> + */
>>> +static int mask_nmi_callback(const struct cpu_user_regs *regs, int cpu)
>>> +{
>>> +    return 1;
>>> +}
>>> +
>>> +static void reschedule_fn(void *unused)
>>> +{
>>> +    smp_mb(); /* Synchronize with setting do_work */
>>> +    raise_softirq(SCHEDULE_SOFTIRQ);
>>> +}
>>> +
>>> +static int xsplice_do_wait(atomic_t *counter, s_time_t timeout,
>>> +                           unsigned int total_cpus, const char *s)
>>> +{
>>> +    int rc = 0;
>>> +
>>> +    while ( atomic_read(counter) != total_cpus && NOW() < timeout )
>>> +        cpu_relax();
>>> +
>>> +    /* Log & abort. */
>>> +    if ( atomic_read(counter) != total_cpus )
>>> +    {
>>> +        printk(XENLOG_DEBUG "%s: %s %u/%u\n", xsplice_work.data->name,
>>> +               s, atomic_read(counter), total_cpus);
>>> +        rc = -EBUSY;
>>> +        xsplice_work.data->rc = rc;
>>> +        xsplice_work.do_work = 0;
>>> +        smp_wmb();
>>> +        return rc;
>>> +    }
>>> +    return rc;
>>> +}
>>> +
>>> +static void xsplice_do_single(unsigned int total_cpus)
>>> +{
>>> +    nmi_callback_t saved_nmi_callback;
>>> +    s_time_t timeout;
>>> +    struct payload *data, *tmp;
>>> +    int rc;
>>> +
>>> +    data = xsplice_work.data;
>>> +    timeout = data->timeout ? data->timeout : MILLISECS(30);
>>
>> The design doc says that a timeout of 0 means infinity.
>
> True. Lets update the document.
>>
>>> +    printk(XENLOG_DEBUG "%s: timeout is %"PRI_stime"ms\n", data->name,
>>> +           timeout / MILLISECS(1));
>>> +
>>> +    timeout += NOW();
>>> +
>>> +    if ( xsplice_do_wait(&xsplice_work.semaphore, timeout, total_cpus,
>>> +                         "Timed out on CPU semaphore") )
>>> +        return;
>>> +
>>> +    /* "Mask" NMIs. */
>>> +    saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
>>> +
>>> +    /* All CPUs are waiting, now signal to disable IRQs. */
>>> +    xsplice_work.ready = 1;
>>> +    smp_wmb();
>>> +
>>> +    atomic_inc(&xsplice_work.irq_semaphore);
>>> +    if ( xsplice_do_wait(&xsplice_work.irq_semaphore, timeout, total_cpus,
>>> +                         "Timed out on IRQ semaphore.") )
>>> +        return;
>>> +
>>> +    local_irq_disable();
>>
>> As far as I can tell, the mechanics of how this works haven't changed, the
>> code has just been reorganized. Which means the points that Martin raised
>> about this mechanism are still outstanding.
>
> A bit. I added the extra timeout on both of the 'spin-around' and also
> moved some of the barriers around. Also removed your spin-lock and used
> the atomic_t mechanism to synchronize.
>
> But the one thing that I didn't do was the spin on the 'workers?' that
> are just spinnig idly. They will do that forever if say the 'master'
> hasn't gone to the IRQ semaphore part.
>
> My thinking was that the 'workers' could also use the timeout feature
> but just multiple it by two?
>

After looking at this again, I remembered that the algorithm I used is 
the same as the one used by stop_machine_run(). That function runs 
without timeouts at all (seemingly without problems), so why shouldn't 
this one? (The only reason stop_machine_run() itself isn't used for 
patching is because we need to enter the function without a stack, i.e. 
not from a tasklet.)
Konrad Rzeszutek Wilk Feb. 5, 2016, 7:30 p.m. UTC | #4
> >>As far as I can tell, the mechanics of how this works haven't changed, the
> >>code has just been reorganized. Which means the points that Martin raised
> >>about this mechanism are still outstanding.
> >
> >A bit. I added the extra timeout on both of the 'spin-around' and also
> >moved some of the barriers around. Also removed your spin-lock and used
> >the atomic_t mechanism to synchronize.
> >
> >But the one thing that I didn't do was the spin on the 'workers?' that
> >are just spinnig idly. They will do that forever if say the 'master'
> >hasn't gone to the IRQ semaphore part.
> >
> >My thinking was that the 'workers' could also use the timeout feature
> >but just multiple it by two?
> >
> 
> After looking at this again, I remembered that the algorithm I used is the
> same as the one used by stop_machine_run(). That function runs without
> timeouts at all (seemingly without problems), so why shouldn't this one?

Because we may have a very busy system and we do not want to impair
the running guests.

> (The only reason stop_machine_run() itself isn't used for patching is
> because we need to enter the function without a stack, i.e. not from a
> tasklet.)
> 
> -- 
> Ross Lagerwall
diff mbox

Patch

diff --git a/xen/arch/arm/xsplice.c b/xen/arch/arm/xsplice.c
index 8d85fa9..06f6875 100644
--- a/xen/arch/arm/xsplice.c
+++ b/xen/arch/arm/xsplice.c
@@ -3,7 +3,15 @@ 
 #include <xen/xsplice_elf.h>
 #include <xen/xsplice.h>
 
-int xsplice_verify_elf(uint8_t *data, ssize_t len)
+void xsplice_apply_jmp(struct xsplice_patch_func *func)
+{
+}
+
+void xsplice_revert_jmp(struct xsplice_patch_func *func)
+{
+}
+
+int xsplice_verify_elf(struct xsplice_elf *elf, uint8_t *data)
 {
     return -ENOSYS;
 }
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index e70c125..03ac0d7 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -36,6 +36,7 @@ 
 #include <xen/cpu.h>
 #include <xen/wait.h>
 #include <xen/guest_access.h>
+#include <xen/xsplice.h>
 #include <public/sysctl.h>
 #include <public/hvm/hvm_vcpu.h>
 #include <asm/regs.h>
@@ -121,6 +122,7 @@  static void idle_loop(void)
         (*pm_idle)();
         do_tasklet();
         do_softirq();
+        do_xsplice(); /* Must be last. */
     }
 }
 
@@ -137,6 +139,7 @@  void startup_cpu_idle_loop(void)
 
 static void noreturn continue_idle_domain(struct vcpu *v)
 {
+    do_xsplice();
     reset_stack_and_jump(idle_loop);
 }
 
@@ -144,6 +147,7 @@  static void noreturn continue_nonidle_domain(struct vcpu *v)
 {
     check_wakeup_from_wait();
     mark_regs_dirty(guest_cpu_user_regs());
+    do_xsplice();
     reset_stack_and_jump(ret_from_intr);
 }
 
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index a66d854..3ea5b97 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -26,6 +26,7 @@ 
 #include <xen/hypercall.h>
 #include <xen/domain_page.h>
 #include <xen/xenoprof.h>
+#include <xen/xsplice.h>
 #include <asm/current.h>
 #include <asm/io.h>
 #include <asm/paging.h>
@@ -1096,6 +1097,7 @@  static void noreturn svm_do_resume(struct vcpu *v)
 
     hvm_do_resume(v);
 
+    do_xsplice();
     reset_stack_and_jump(svm_asm_do_resume);
 }
 
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 5bc3c74..1008163 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -25,6 +25,7 @@ 
 #include <xen/kernel.h>
 #include <xen/keyhandler.h>
 #include <xen/vm_event.h>
+#include <xen/xsplice.h>
 #include <asm/current.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
@@ -1716,6 +1717,7 @@  void vmx_do_resume(struct vcpu *v)
     }
 
     hvm_do_resume(v);
+    do_xsplice();
     reset_stack_and_jump(vmx_asm_do_vmentry);
 }
 
diff --git a/xen/arch/x86/xsplice.c b/xen/arch/x86/xsplice.c
index 7b13511..012ddfe 100644
--- a/xen/arch/x86/xsplice.c
+++ b/xen/arch/x86/xsplice.c
@@ -3,6 +3,25 @@ 
 #include <xen/xsplice_elf.h>
 #include <xen/xsplice.h>
 
+#define PATCH_INSN_SIZE 5
+
+void xsplice_apply_jmp(struct xsplice_patch_func *func)
+{
+    uint32_t val;
+    uint8_t *old_ptr;
+
+    old_ptr = (uint8_t *)func->old_addr;
+    memcpy(func->undo, old_ptr, PATCH_INSN_SIZE);
+    *old_ptr++ = 0xe9; /* Relative jump */
+    val = func->new_addr - func->old_addr - PATCH_INSN_SIZE;
+    memcpy(old_ptr, &val, sizeof val);
+}
+
+void xsplice_revert_jmp(struct xsplice_patch_func *func)
+{
+    memcpy((void *)func->old_addr, func->undo, PATCH_INSN_SIZE);
+}
+
 int xsplice_verify_elf(struct xsplice_elf *elf, uint8_t *data)
 {
 
diff --git a/xen/common/xsplice.c b/xen/common/xsplice.c
index 67f6fc7..5abeb28 100644
--- a/xen/common/xsplice.c
+++ b/xen/common/xsplice.c
@@ -3,6 +3,7 @@ 
  *
  */
 
+#include <xen/cpu.h>
 #include <xen/guest_access.h>
 #include <xen/keyhandler.h>
 #include <xen/lib.h>
@@ -10,25 +11,38 @@ 
 #include <xen/mm.h>
 #include <xen/sched.h>
 #include <xen/smp.h>
+#include <xen/softirq.h>
 #include <xen/spinlock.h>
+#include <xen/wait.h>
 #include <xen/xsplice_elf.h>
 #include <xen/xsplice.h>
 
 #include <asm/event.h>
+#include <asm/nmi.h>
 #include <public/sysctl.h>
 
-static DEFINE_SPINLOCK(payload_list_lock);
+/*
+ * Protects against payload_list operations and also allows only one
+ * caller in schedule_work.
+ */
+static DEFINE_SPINLOCK(payload_lock);
 static LIST_HEAD(payload_list);
 
+static LIST_HEAD(applied_list);
+
 static unsigned int payload_cnt;
 static unsigned int payload_version = 1;
 
 struct payload {
     int32_t state;                       /* One of the XSPLICE_STATE_*. */
     int32_t rc;                          /* 0 or -XEN_EXX. */
+    uint32_t timeout;                    /* Timeout to do the operation. */
     struct list_head list;               /* Linked to 'payload_list'. */
     void *payload_address;               /* Virtual address mapped. */
     size_t payload_pages;                /* Nr of the pages. */
+    struct list_head applied_list;       /* Linked to 'applied_list'. */
+    struct xsplice_patch_func *funcs;    /* The array of functions to patch. */
+    unsigned int nfuncs;                 /* Nr of functions to patch. */
 
     char name[XEN_XSPLICE_NAME_SIZE + 1];/* Name of it. */
 };
@@ -36,6 +50,23 @@  struct payload {
 static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len);
 static void free_payload_data(struct payload *payload);
 
+/* Defines an outstanding patching action. */
+struct xsplice_work
+{
+    atomic_t semaphore;          /* Used for rendezvous. First to grab it will
+                                    do the patching. */
+    atomic_t irq_semaphore;      /* Used to signal all IRQs disabled. */
+    struct payload *data;        /* The payload on which to act. */
+    volatile bool_t do_work;     /* Signals work to do. */
+    volatile bool_t ready;       /* Signals all CPUs synchronized. */
+    uint32_t cmd;                /* Action request: XSPLICE_ACTION_* */
+};
+
+/* There can be only one outstanding patching action. */
+static struct xsplice_work xsplice_work;
+
+static int schedule_work(struct payload *data, uint32_t cmd);
+
 static const char *state2str(int32_t state)
 {
 #define STATE(x) [XSPLICE_STATE_##x] = #x
@@ -61,15 +92,24 @@  static const char *state2str(int32_t state)
 static void xsplice_printall(unsigned char key)
 {
     struct payload *data;
+    unsigned int i;
 
-    spin_lock(&payload_list_lock);
+    spin_lock(&payload_lock);
 
     list_for_each_entry ( data, &payload_list, list )
-        printk(" name=%s state=%s(%d) %p using %zu pages.\n", data->name,
+    {
+        printk(" name=%s state=%s(%d) %p using %zu pages:\n", data->name,
                state2str(data->state), data->state, data->payload_address,
                data->payload_pages);
 
-    spin_unlock(&payload_list_lock);
+        for ( i = 0; i < data->nfuncs; i ++ )
+        {
+            struct xsplice_patch_func *f = &(data->funcs[i]);
+            printk("    %s patch 0x%lx(%u) with 0x%lx(%u)\n",
+                   f->name, f->old_addr, f->old_size, f->new_addr, f->new_size);
+        }
+    }
+    spin_unlock(&payload_lock);
 }
 
 static int verify_name(xen_xsplice_name_t *name)
@@ -103,7 +143,7 @@  static int find_payload(xen_xsplice_name_t *name, bool_t need_lock,
         return -EFAULT;
 
     if ( need_lock )
-        spin_lock(&payload_list_lock);
+        spin_lock(&payload_lock);
 
     rc = -ENOENT;
     list_for_each_entry ( data, &payload_list, list )
@@ -117,7 +157,7 @@  static int find_payload(xen_xsplice_name_t *name, bool_t need_lock,
     }
 
     if ( need_lock )
-        spin_unlock(&payload_list_lock);
+        spin_unlock(&payload_lock);
 
     return rc;
 }
@@ -137,7 +177,7 @@  static int verify_payload(xen_sysctl_xsplice_upload_t *upload)
 }
 
 /*
- * We MUST be holding the payload_list_lock spinlock.
+ * We MUST be holding the payload_lock spinlock.
  */
 static void free_payload(struct payload *data)
 {
@@ -191,11 +231,11 @@  static int xsplice_upload(xen_sysctl_xsplice_upload_t *upload)
     data->rc = 0;
     INIT_LIST_HEAD(&data->list);
 
-    spin_lock(&payload_list_lock);
+    spin_lock(&payload_lock);
     list_add_tail(&data->list, &payload_list);
     payload_cnt++;
     payload_version++;
-    spin_unlock(&payload_list_lock);
+    spin_unlock(&payload_lock);
 
     free_xenheap_pages(raw_data, get_order_from_bytes(upload->size));
     return 0;
@@ -250,10 +290,10 @@  static int xsplice_list(xen_sysctl_xsplice_list_t *list)
          !guest_handle_okay(list->len, sizeof(uint32_t) * list->nr) )
         return -EINVAL;
 
-    spin_lock(&payload_list_lock);
+    spin_lock(&payload_lock);
     if ( list->idx > payload_cnt || !list->nr )
     {
-        spin_unlock(&payload_list_lock);
+        spin_unlock(&payload_lock);
         return -EINVAL;
     }
 
@@ -283,7 +323,7 @@  static int xsplice_list(xen_sysctl_xsplice_list_t *list)
     }
     list->nr = payload_cnt - i; /* Remaining amount. */
     list->version = payload_version;
-    spin_unlock(&payload_list_lock);
+    spin_unlock(&payload_lock);
 
     /* And how many we have processed. */
     return rc ? : idx;
@@ -298,7 +338,7 @@  static int xsplice_action(xen_sysctl_xsplice_action_t *action)
     if ( rc )
         return rc;
 
-    spin_lock(&payload_list_lock);
+    spin_lock(&payload_lock);
     rc = find_payload(&action->name, 0 /* We are holding the lock. */, &data);
     if ( rc )
         goto out;
@@ -327,28 +367,25 @@  static int xsplice_action(xen_sysctl_xsplice_action_t *action)
     case XSPLICE_ACTION_REVERT:
         if ( data->state == XSPLICE_STATE_APPLIED )
         {
-            /* No implementation yet. */
-            data->state = XSPLICE_STATE_CHECKED;
-            data->rc = 0;
-            rc = 0;
+            data->rc = -EAGAIN;
+            data->timeout = action->timeout;
+            rc = schedule_work(data, action->cmd);
         }
         break;
     case XSPLICE_ACTION_APPLY:
         if ( (data->state == XSPLICE_STATE_CHECKED) )
         {
-            /* No implementation yet. */
-            data->state = XSPLICE_STATE_APPLIED;
-            data->rc = 0;
-            rc = 0;
+            data->rc = -EAGAIN;
+            data->timeout = action->timeout;
+            rc = schedule_work(data, action->cmd);
         }
         break;
     case XSPLICE_ACTION_REPLACE:
         if ( data->state == XSPLICE_STATE_CHECKED )
         {
-            /* No implementation yet. */
-            data->state = XSPLICE_STATE_CHECKED;
-            data->rc = 0;
-            rc = 0;
+            data->rc = -EAGAIN;
+            data->timeout = action->timeout;
+            rc = schedule_work(data, action->cmd);
         }
         break;
     default:
@@ -357,7 +394,7 @@  static int xsplice_action(xen_sysctl_xsplice_action_t *action)
     }
 
  out:
-    spin_unlock(&payload_list_lock);
+    spin_unlock(&payload_lock);
 
     return rc;
 }
@@ -391,12 +428,13 @@  int xsplice_control(xen_sysctl_xsplice_op_t *xsplice)
     return rc;
 }
 
+#ifdef CONFIG_X86
 static void find_hole(ssize_t pages, unsigned long *hole_start,
                       unsigned long *hole_end)
 {
     struct payload *data, *data2;
 
-    spin_lock(&payload_list_lock);
+    spin_lock(&payload_lock);
     list_for_each_entry ( data, &payload_list, list )
     {
         list_for_each_entry ( data2, &payload_list, list )
@@ -415,7 +453,7 @@  static void find_hole(ssize_t pages, unsigned long *hole_start,
         if ( &data2->list == &payload_list )
             break;
     }
-    spin_unlock(&payload_list_lock);
+    spin_unlock(&payload_lock);
 }
 
 /*
@@ -423,7 +461,6 @@  static void find_hole(ssize_t pages, unsigned long *hole_start,
  * allocating space, loading the allocated sections, resolving symbols,
  * performing relocations, etc.
  */
-#ifdef CONFIG_X86
 static void *alloc_payload(size_t size)
 {
     mfn_t *mfn, *mfn_ptr;
@@ -572,6 +609,47 @@  static int move_payload(struct payload *payload, struct xsplice_elf *elf)
     return 0;
 }
 
+static int find_special_sections(struct payload *payload,
+                                 struct xsplice_elf *elf)
+{
+    struct xsplice_elf_sec *sec;
+    unsigned int i;
+    struct xsplice_patch_func *f;
+
+    sec = xsplice_elf_sec_by_name(elf, ".xsplice.funcs");
+    if ( !sec )
+    {
+        printk(XENLOG_ERR "%s: .xsplice.funcs is missing!\n", elf->name);
+        return -EINVAL;
+    }
+
+    if ( ( !sec->sec->sh_size ) ||
+         ( sec->sec->sh_size % sizeof *payload->funcs ) )
+        return -EINVAL;
+
+    payload->funcs = (struct xsplice_patch_func *)sec->load_addr;
+    payload->nfuncs = sec->sec->sh_size / (sizeof *payload->funcs);
+
+    for ( i = 0; i < payload->nfuncs; i++ )
+    {
+        unsigned int j;
+
+        f = &(payload->funcs[i]);
+
+        if ( !f->new_addr || !f->old_addr || !f->old_size || !f->new_size )
+            return -EINVAL;
+
+        for ( j = 0; j < 8; j ++ )
+            if ( f->undo[j] )
+                return -EINVAL;
+
+        for ( j = 0; j < 24; j ++ )
+            if ( f->pad[j] )
+                return -EINVAL;
+    }
+    return 0;
+}
+
 static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len)
 {
     struct xsplice_elf elf;
@@ -601,7 +679,10 @@  static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len)
     if ( rc )
         goto err_payload;
 
-    /* Free our temporary data structure. */
+    rc = find_special_sections(payload, &elf);
+    if ( rc )
+        goto err_payload;
+
     xsplice_elf_free(&elf);
     return 0;
 
@@ -613,6 +694,254 @@  static int load_payload_data(struct payload *payload, uint8_t *raw, ssize_t len)
     return rc;
 }
 
+
+/*
+ * The following functions get the CPUs into an appropriate state and
+ * apply (or revert) each of the module's functions.
+ */
+
+/*
+ * This function is executed having all other CPUs with no stack (we may
+ * have cpu_idle on it) and IRQs disabled. We guard against NMI by temporarily
+ * installing our NOP NMI handler.
+ */
+static int apply_payload(struct payload *data)
+{
+    unsigned int i;
+
+    printk(XENLOG_DEBUG "%s: Applying %u functions.\n", data->name,
+           data->nfuncs);
+
+    for ( i = 0; i < data->nfuncs; i++ )
+        xsplice_apply_jmp(data->funcs + i);
+
+    list_add_tail(&data->applied_list, &applied_list);
+
+    return 0;
+}
+
+/*
+ * This function is executed having all other CPUs with no stack (we may
+ * have cpu_idle on it) and IRQs disabled.
+ */
+static int revert_payload(struct payload *data)
+{
+    unsigned int i;
+
+    printk(XENLOG_DEBUG "%s: Reverting.\n", data->name);
+
+    for ( i = 0; i < data->nfuncs; i++ )
+        xsplice_revert_jmp(data->funcs + i);
+
+    list_del(&data->applied_list);
+
+    return 0;
+}
+
+/* Must be holding the payload_list lock. */
+static int schedule_work(struct payload *data, uint32_t cmd)
+{
+    /* Fail if an operation is already scheduled. */
+    if ( xsplice_work.do_work )
+        return -EAGAIN;
+
+    xsplice_work.cmd = cmd;
+    xsplice_work.data = data;
+    atomic_set(&xsplice_work.semaphore, -1);
+    atomic_set(&xsplice_work.irq_semaphore, -1);
+
+    xsplice_work.ready = 0;
+    smp_wmb();
+    xsplice_work.do_work = 1;
+    smp_wmb();
+
+    return 0;
+}
+
+/*
+ * Note that because of this NOP code the do_nmi is not safely patchable.
+ * Also if we do receive 'real' NMIs we have lost them.
+ */
+static int mask_nmi_callback(const struct cpu_user_regs *regs, int cpu)
+{
+    return 1;
+}
+
+static void reschedule_fn(void *unused)
+{
+    smp_mb(); /* Synchronize with setting do_work */
+    raise_softirq(SCHEDULE_SOFTIRQ);
+}
+
+static int xsplice_do_wait(atomic_t *counter, s_time_t timeout,
+                           unsigned int total_cpus, const char *s)
+{
+    int rc = 0;
+
+    while ( atomic_read(counter) != total_cpus && NOW() < timeout )
+        cpu_relax();
+
+    /* Log & abort. */
+    if ( atomic_read(counter) != total_cpus )
+    {
+        printk(XENLOG_DEBUG "%s: %s %u/%u\n", xsplice_work.data->name,
+               s, atomic_read(counter), total_cpus);
+        rc = -EBUSY;
+        xsplice_work.data->rc = rc;
+        xsplice_work.do_work = 0;
+        smp_wmb();
+        return rc;
+    }
+    return rc;
+}
+
+static void xsplice_do_single(unsigned int total_cpus)
+{
+    nmi_callback_t saved_nmi_callback;
+    s_time_t timeout;
+    struct payload *data, *tmp;
+    int rc;
+
+    data = xsplice_work.data;
+    timeout = data->timeout ? data->timeout : MILLISECS(30);
+    printk(XENLOG_DEBUG "%s: timeout is %"PRI_stime"ms\n", data->name,
+           timeout / MILLISECS(1));
+
+    timeout += NOW();
+
+    if ( xsplice_do_wait(&xsplice_work.semaphore, timeout, total_cpus,
+                         "Timed out on CPU semaphore") )
+        return;
+
+    /* "Mask" NMIs. */
+    saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
+
+    /* All CPUs are waiting, now signal to disable IRQs. */
+    xsplice_work.ready = 1;
+    smp_wmb();
+
+    atomic_inc(&xsplice_work.irq_semaphore);
+    if ( xsplice_do_wait(&xsplice_work.irq_semaphore, timeout, total_cpus,
+                         "Timed out on IRQ semaphore.") )
+        return;
+
+    local_irq_disable();
+    /* Now this function should be the only one on any stack.
+     * No need to lock the payload list or applied list. */
+    switch ( xsplice_work.cmd )
+    {
+    case XSPLICE_ACTION_APPLY:
+        rc = apply_payload(data);
+        if ( rc == 0 )
+            data->state = XSPLICE_STATE_APPLIED;
+        break;
+    case XSPLICE_ACTION_REVERT:
+        rc = revert_payload(data);
+        if ( rc == 0 )
+            data->state = XSPLICE_STATE_CHECKED;
+        break;
+    case XSPLICE_ACTION_REPLACE:
+        list_for_each_entry_safe_reverse ( data, tmp, &applied_list, list )
+        {
+            data->rc = revert_payload(data);
+            if ( data->rc == 0 )
+                data->state = XSPLICE_STATE_CHECKED;
+            else
+            {
+                rc = -EINVAL;
+                break;
+            }
+        }
+        if ( rc != -EINVAL )
+        {
+            rc = apply_payload(xsplice_work.data);
+            if ( rc == 0 )
+                xsplice_work.data->state = XSPLICE_STATE_APPLIED;
+        }
+        break;
+    default:
+        rc = -EINVAL;
+        break;
+    }
+
+    xsplice_work.data->rc = rc;
+
+    local_irq_enable();
+    set_nmi_callback(saved_nmi_callback);
+
+    xsplice_work.do_work = 0;
+    smp_wmb(); /* Synchronize with waiting CPUs. */
+}
+
+/*
+ * The main function which manages the work of quiescing the system and
+ * patching code.
+ */
+void do_xsplice(void)
+{
+    struct payload *p = xsplice_work.data;
+    unsigned int cpu = smp_processor_id();
+
+    /* Fast path: no work to do. */
+    if ( likely(!xsplice_work.do_work) )
+        return;
+
+    ASSERT(local_irq_is_enabled());
+
+    /* Set at -1, so will go up to num_online_cpus - 1 */
+    if ( atomic_inc_and_test(&xsplice_work.semaphore) )
+    {
+        unsigned int total_cpus;
+
+        if ( !get_cpu_maps() )
+        {
+            printk(XENLOG_DEBUG "%s: CPU%u - unable to get cpu_maps lock.\n",
+                   p->name, cpu);
+            xsplice_work.data->rc = -EBUSY;
+            xsplice_work.do_work = 0;
+            return;
+        }
+
+        barrier(); /* MUST do it after get_cpu_maps. */
+        total_cpus = num_online_cpus() - 1;
+
+        if ( total_cpus )
+        {
+            printk(XENLOG_DEBUG "%s: CPU%u - IPIing the %u CPUs.\n", p->name,
+                   cpu, total_cpus);
+            smp_call_function(reschedule_fn, NULL, 0);
+        }
+        (void)xsplice_do_single(total_cpus);
+
+        ASSERT(local_irq_is_enabled());
+
+        put_cpu_maps();
+
+        printk(XENLOG_DEBUG "%s finished with rc=%d\n", p->name, p->rc);
+    }
+    else
+    {
+        /* Wait for all CPUs to rendezvous. */
+        while ( xsplice_work.do_work && !xsplice_work.ready )
+        {
+            cpu_relax();
+            smp_rmb();
+        }
+
+        /* Disable IRQs and signal. */
+        local_irq_disable();
+        atomic_inc(&xsplice_work.irq_semaphore);
+
+        /* Wait for patching to complete. */
+        while ( xsplice_work.do_work )
+        {
+            cpu_relax();
+            smp_rmb();
+        }
+        local_irq_enable();
+    }
+}
+
 static int __init xsplice_init(void)
 {
     register_keyhandler('x', xsplice_printall, "print xsplicing info", 1);
diff --git a/xen/include/asm-arm/nmi.h b/xen/include/asm-arm/nmi.h
index a60587e..82aff35 100644
--- a/xen/include/asm-arm/nmi.h
+++ b/xen/include/asm-arm/nmi.h
@@ -4,6 +4,19 @@ 
 #define register_guest_nmi_callback(a)  (-ENOSYS)
 #define unregister_guest_nmi_callback() (-ENOSYS)
 
+typedef int (*nmi_callback_t)(const struct cpu_user_regs *regs, int cpu);
+
+/**
+ * set_nmi_callback
+ *
+ * Set a handler for an NMI. Only one handler may be
+ * set. Return the old nmi callback handler.
+ */
+static inline nmi_callback_t set_nmi_callback(nmi_callback_t callback)
+{
+    return NULL;
+}
+
 #endif /* ASM_NMI_H */
 /*
  * Local variables:
diff --git a/xen/include/xen/xsplice.h b/xen/include/xen/xsplice.h
index b90742f..61a9ec6 100644
--- a/xen/include/xen/xsplice.h
+++ b/xen/include/xen/xsplice.h
@@ -6,8 +6,30 @@  struct xsplice_elf_sec;
 struct xsplice_elf_sym;
 struct xen_sysctl_xsplice_op;
 
+/*
+ * The structure which defines the patching. This is what the hypervisor
+ * expects in the '.xsplice.func' section of the ELF file.
+ *
+ * This MUST be in sync with what the tools generate.
+ */
+struct xsplice_patch_func {
+    const char *name;
+    unsigned long new_addr;
+    const unsigned long old_addr;
+    uint32_t new_size;
+    const uint32_t old_size;
+    uint8_t undo[8];
+    uint8_t pad[24];
+};
+
 int xsplice_control(struct xen_sysctl_xsplice_op *);
 
+#ifdef CONFIG_XSPLICE
+void do_xsplice(void);
+#else
+static inline void do_xsplice(void) { };
+#endif
+
 /* Arch hooks */
 int xsplice_verify_elf(struct xsplice_elf *elf, uint8_t *data);
 int xsplice_perform_rel(struct xsplice_elf *elf,
@@ -16,4 +38,6 @@  int xsplice_perform_rel(struct xsplice_elf *elf,
 int xsplice_perform_rela(struct xsplice_elf *elf,
                          struct xsplice_elf_sec *base,
                          struct xsplice_elf_sec *rela);
+void xsplice_apply_jmp(struct xsplice_patch_func *func);
+void xsplice_revert_jmp(struct xsplice_patch_func *func);
 #endif /* __XEN_XSPLICE_H__ */