diff mbox

[v3,01/23] xen/xsplice: Hypervisor implementation of XEN_XSPLICE_op (v10)

Message ID 1455300361-13092-2-git-send-email-konrad.wilk@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Konrad Rzeszutek Wilk Feb. 12, 2016, 6:05 p.m. UTC
The implementation does not actually do any patching.

It just adds the framework for doing the hypercalls,
keeping track of ELF payloads, and the basic operations:
 - query which payloads exist,
 - query for specific payloads,
 - check*1, apply*1, replace*1, and unload payloads.

*1: Which of course in this patch are nops.

Acked-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>

---
v2: Rebased on keyhandler: rework keyhandler infrastructure
v3: Fixed XSM.
v4: Removed REVERTED state.
    Split status and error code.
    Add REPLACE action.
    Separate payload data from the payload structure.
    s/XSPLICE_ID_../XSPLICE_NAME_../
v5: Add xsplice and CONFIG_XSPLICE build toption.
    Fix code per Jan's review.
    Update the sysctl.h (change bits to enum like)
v6: Rebase on Kconfig changes.
v7: Add missing pad checks. Re-order keyhandler.h to build on ARM.
v8: Rebase on build: hook the schedulers into Kconfig
v9: s/id/name/
    s/payload_list_lock/payload_lock/
v10: Put #ifdef CONFIG_XSPLICE in header file.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 tools/flask/policy/policy/modules/xen/xen.te |   1 +
 xen/common/Kconfig                           |  10 +
 xen/common/Makefile                          |   2 +
 xen/common/sysctl.c                          |   7 +
 xen/common/xsplice.c                         | 386 +++++++++++++++++++++++++++
 xen/include/public/sysctl.h                  | 156 +++++++++++
 xen/include/xen/xsplice.h                    |  15 ++
 xen/xsm/flask/hooks.c                        |   6 +
 xen/xsm/flask/policy/access_vectors          |   2 +
 9 files changed, 585 insertions(+)
 create mode 100644 xen/common/xsplice.c
 create mode 100644 xen/include/xen/xsplice.h

Comments

Andrew Cooper Feb. 12, 2016, 8:11 p.m. UTC | #1
On 12/02/16 18:05, Konrad Rzeszutek Wilk wrote:
> diff --git a/xen/common/Kconfig b/xen/common/Kconfig
> index 6f404b4..619aa9e 100644
> --- a/xen/common/Kconfig
> +++ b/xen/common/Kconfig
> @@ -152,4 +152,14 @@ config SCHED_DEFAULT
>  
>  endmenu
>  
> +# Enable/Disable xsplice support
> +config XSPLICE
> +	bool "xsplice support"

"XSplice live patching support" ?

> +	default y
> +	---help---
> +	  Allows a running Xen hypervisor to be patched without rebooting.
> +	  This is primarily used to patch an hypervisor with XSA fixes.

Somewhere in here should use the terms "dynamic" and "binary patching",
to better describe its method of operation.

> +
> +	  If unsure, say Y.
> +
>  endmenu
> diff --git a/xen/common/Makefile b/xen/common/Makefile
> index 6e82b33..43b3911 100644
> --- a/xen/common/Makefile
> +++ b/xen/common/Makefile
> @@ -72,3 +72,5 @@ subdir-$(coverage) += gcov
>  
>  subdir-y += libelf
>  subdir-$(CONFIG_HAS_DEVICE_TREE) += libfdt
> +
> +obj-$(CONFIG_XSPLICE) += xsplice.o

Should be part of the main obj- selection higher up.

> diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
> index 1624024..68e3eb4 100644
> --- a/xen/common/sysctl.c
> +++ b/xen/common/sysctl.c
> @@ -28,6 +28,7 @@
>  #include <xsm/xsm.h>
>  #include <xen/pmstat.h>
>  #include <xen/gcov.h>
> +#include <xen/xsplice.h>
>  
>  long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
>  {
> @@ -460,6 +461,12 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
>          ret = tmem_control(&op->u.tmem_op);
>          break;
>  
> +    case XEN_SYSCTL_xsplice_op:
> +        ret = xsplice_control(&op->u.xsplice);

Could we name this do_xsplice_op() to match prevailing subop style.

> +        if ( ret != -ENOSYS )
> +            copyback = 1;
> +        break;
> +

Not related to this patch.  I (and by this, I mean someone with time ;p)
should do some cleanup and pass copyback by pointer to subops.  This
allows for finer grain control of whether a copyback is needed.

> +static const char *state2str(int32_t state)
> +{
> +#define STATE(x) [XSPLICE_STATE_##x] = #x
> +    static const char *const names[] = {
> +            STATE(LOADED),
> +            STATE(CHECKED),
> +            STATE(APPLIED),
> +    };
> +#undef STATE
> +
> +    if (state >= ARRAY_SIZE(names))
> +        return "unknown";
> +
> +    if (state < 0)
> +        return "-EXX";
> +
> +    if (!names[state])
> +        return "unknown";

This could be folded into the ARRAY_SIZE() check.

> +
> +    return names[state];
> +}
> +
> +static void xsplice_printall(unsigned char key)
> +{
> +    struct payload *data;
> +
> +    spin_lock(&payload_lock);
> +
> +    list_for_each_entry ( data, &payload_list, list )
> +        printk(" name=%s state=%s(%d)\n", data->name,
> +               state2str(data->state), data->state);
> +
> +    spin_unlock(&payload_lock);
> +}
> +
> +static int verify_name(xen_xsplice_name_t *name)

const

> +{
> +    if ( name->size == 0 || name->size > XEN_XSPLICE_NAME_SIZE )
> +        return -EINVAL;
> +
> +    if ( name->pad[0] || name->pad[1] || name->pad[2] )
> +        return -EINVAL;
> +
> +    if ( !guest_handle_okay(name->name, name->size) )
> +        return -EINVAL;
> +
> +    return 0;
> +}
> +
> +static int find_payload(xen_xsplice_name_t *name, bool_t need_lock,
> +                        struct payload **f)
> +{
> +    struct payload *data;
> +    XEN_GUEST_HANDLE_PARAM(char) str;
> +    char n[XEN_XSPLICE_NAME_SIZE + 1] = { 0 };
> +    int rc = -EINVAL;
> +
> +    rc = verify_name(name);
> +    if ( rc )
> +        return rc;
> +
> +    str = guest_handle_cast(name->name, char);
> +    if ( copy_from_guest(n, str, name->size) )
> +        return -EFAULT;
> +
> +    if ( need_lock )
> +        spin_lock(&payload_lock);

What is the usecase where the lock shouldn't be taken?

[Edit]  From below, its clear that this should be a recursive spinlock.

> +
> +    rc = -ENOENT;
> +    list_for_each_entry ( data, &payload_list, list )
> +    {
> +        if ( !strcmp(data->name, n) )
> +        {
> +            *f = data;
> +            rc = 0;
> +            break;
> +        }
> +    }
> +
> +    if ( need_lock )
> +        spin_unlock(&payload_lock);
> +
> +    return rc;
> +}
> +
> +static int verify_payload(xen_sysctl_xsplice_upload_t *upload)

const

> +{
> +    if ( verify_name(&upload->name) )
> +        return -EINVAL;
> +
> +    if ( upload->size == 0 )
> +        return -EINVAL;
> +
> +    if ( !guest_handle_okay(upload->payload, upload->size) )
> +        return -EFAULT;
> +
> +    return 0;
> +}
> +
> +/*
> + * We MUST be holding the payload_lock spinlock.

In which case ASSERT(spin_is_locked())

> + */
> +static void free_payload(struct payload *data)
> +{
> +    list_del(&data->list);
> +    payload_cnt--;
> +    payload_version++;
> +    xfree(data);
> +}
> +
> +static int xsplice_upload(xen_sysctl_xsplice_upload_t *upload)
> +{
> +    struct payload *data = NULL;
> +    uint8_t *raw_data;
> +    int rc;
> +
> +    rc = verify_payload(upload);
> +    if ( rc )
> +        return rc;
> +
> +    rc = find_payload(&upload->name, 1 /* true. */, &data);
> +    if ( rc == 0 /* Found. */ )
> +        return -EEXIST;
> +
> +    if ( rc != -ENOENT )
> +        return rc;
> +
> +    data = xzalloc(struct payload);
> +    if ( !data )
> +        return -ENOMEM;
> +
> +    memset(data, 0, sizeof *data);

xzalloc() has already zeroed data for you.

> +    rc = -EFAULT;
> +    if ( copy_from_guest(data->name, upload->name.name, upload->name.size) )
> +        goto err_data;
> +
> +    rc = -ENOMEM;
> +    raw_data = alloc_xenheap_pages(get_order_from_bytes(upload->size), 0);

Better to use valloc(), as it won't fail given lots of memory fragmentation.

> +    if ( !raw_data )
> +        goto err_data;
> +
> +    rc = -EFAULT;
> +    if ( copy_from_guest(raw_data, upload->payload, upload->size) )
> +        goto err_raw;
> +
> +    data->state = XSPLICE_STATE_LOADED;
> +    data->rc = 0;
> +    INIT_LIST_HEAD(&data->list);
> +
> +    spin_lock(&payload_lock);
> +    list_add_tail(&data->list, &payload_list);
> +    payload_cnt++;
> +    payload_version++;
> +    spin_unlock(&payload_lock);
> +
> +    free_xenheap_pages(raw_data, get_order_from_bytes(upload->size));
> +    return 0;
> +
> + err_raw:
> +    free_xenheap_pages(raw_data, get_order_from_bytes(upload->size));
> + err_data:
> +    xfree(data);

It would be cleaner to combine these two err lables into a single err
path.  Both free() functions function sensibly with NULL pointers.

> +    return rc;
> +}
> +
> +static int xsplice_get(xen_sysctl_xsplice_summary_t *summary)
> +{
> +    struct payload *data;
> +    int rc;
> +
> +    if ( summary->status.state )
> +        return -EINVAL;
> +
> +    if ( summary->status.rc != 0 )
> +        return -EINVAL;
> +
> +    rc = verify_name(&summary->name);
> +    if ( rc )
> +        return rc;
> +
> +    rc = find_payload(&summary->name, 1 /* true. */, &data);
> +    if ( rc )
> +        return rc;
> +
> +    summary->status.state = data->state;
> +    summary->status.rc = data->rc;
> +
> +    return 0;
> +}
> +
> +static int xsplice_list(xen_sysctl_xsplice_list_t *list)
> +{
> +    xen_xsplice_status_t status;
> +    struct payload *data;
> +    unsigned int idx = 0, i = 0;
> +    int rc = 0;
> +
> +    if ( list->nr > 1024 )
> +        return -E2BIG;
> +
> +    if ( list->pad != 0 )
> +        return -EINVAL;
> +
> +    if ( !guest_handle_okay(list->status, sizeof(status) * list->nr) ||
> +         !guest_handle_okay(list->name, XEN_XSPLICE_NAME_SIZE * list->nr) ||
> +         !guest_handle_okay(list->len, sizeof(uint32_t) * list->nr) )
> +        return -EINVAL;
> +
> +    spin_lock(&payload_lock);
> +    if ( list->idx > payload_cnt || !list->nr )
> +    {
> +        spin_unlock(&payload_lock);
> +        return -EINVAL;
> +    }
> +
> +    list_for_each_entry( data, &payload_list, list )
> +    {
> +        uint32_t len;
> +
> +        if ( list->idx > i++ )
> +            continue;
> +
> +        status.state = data->state;
> +        status.rc = data->rc;
> +        len = strlen(data->name);
> +
> +        /* N.B. 'idx' != 'i'. */
> +        if ( __copy_to_guest_offset(list->name, idx * XEN_XSPLICE_NAME_SIZE,
> +                                    data->name, len) ||
> +             __copy_to_guest_offset(list->len, idx, &len, 1) ||
> +             __copy_to_guest_offset(list->status, idx, &status, 1) )
> +        {
> +            rc = -EFAULT;
> +            break;
> +        }
> +        idx++;

Some extra newlines around here please.

> +        if ( hypercall_preempt_check() || (idx + 1 > list->nr) )
> +            break;
> +    }
> +    list->nr = payload_cnt - i; /* Remaining amount. */
> +    list->version = payload_version;
> +    spin_unlock(&payload_lock);
> +
> +    /* And how many we have processed. */
> +    return rc ? : idx;
> +}
> +
> +static int xsplice_action(xen_sysctl_xsplice_action_t *action)
> +{
> +    struct payload *data;
> +    int rc;
> +
> +    rc = verify_name(&action->name);
> +    if ( rc )
> +        return rc;
> +
> +    spin_lock(&payload_lock);
> +    rc = find_payload(&action->name, 0 /* We are holding the lock. */, &data);

Looks like payload_lock should be a recursive lock.  Please do that,
rather than risk accessing locked data without the lock held at all.

> +    if ( rc )
> +        goto out;
> +
> +    switch ( action->cmd )
> +    {
> +    case XSPLICE_ACTION_CHECK:
> +        if ( (data->state == XSPLICE_STATE_LOADED) ||
> +             (data->state == XSPLICE_STATE_CHECKED) )
> +        {
> +            /* No implementation yet. */
> +            data->state = XSPLICE_STATE_CHECKED;
> +            data->rc = 0;
> +            rc = 0;
> +        }
> +        break;

Newlines between break and case statements please.

> +    case XSPLICE_ACTION_UNLOAD:
> +        if ( (data->state == XSPLICE_STATE_LOADED) ||
> +             (data->state == XSPLICE_STATE_CHECKED) )
> +        {
> +            free_payload(data);
> +            /* No touching 'data' from here on! */
> +            rc = 0;
> +        }
> +        break;
> +    case XSPLICE_ACTION_REVERT:
> +        if ( data->state == XSPLICE_STATE_APPLIED )
> +        {
> +            /* No implementation yet. */
> +            data->state = XSPLICE_STATE_CHECKED;
> +            data->rc = 0;
> +            rc = 0;
> +        }
> +        break;
> +    case XSPLICE_ACTION_APPLY:
> +        if ( (data->state == XSPLICE_STATE_CHECKED) )
> +        {
> +            /* No implementation yet. */
> +            data->state = XSPLICE_STATE_APPLIED;
> +            data->rc = 0;
> +            rc = 0;
> +        }
> +        break;
> +    case XSPLICE_ACTION_REPLACE:
> +        if ( data->state == XSPLICE_STATE_CHECKED )
> +        {
> +            /* No implementation yet. */
> +            data->state = XSPLICE_STATE_CHECKED;
> +            data->rc = 0;
> +            rc = 0;
> +        }
> +        break;
> +    default:
> +        rc = -EOPNOTSUPP;
> +        break;
> +    }
> +
> + out:
> +    spin_unlock(&payload_lock);
> +
> +    return rc;
> +}
> +
> +int xsplice_control(xen_sysctl_xsplice_op_t *xsplice)
> +{
> +    int rc;
> +
> +    if ( xsplice->pad != 0 )
> +        return -EINVAL;
> +
> +    switch ( xsplice->cmd )
> +    {
> +    case XEN_SYSCTL_XSPLICE_UPLOAD:
> +        rc = xsplice_upload(&xsplice->u.upload);
> +        break;

Newlines for these as well please.

> +    case XEN_SYSCTL_XSPLICE_GET:
> +        rc = xsplice_get(&xsplice->u.get);
> +        break;
> +    case XEN_SYSCTL_XSPLICE_LIST:
> +        rc = xsplice_list(&xsplice->u.list);
> +        break;
> +    case XEN_SYSCTL_XSPLICE_ACTION:
> +        rc = xsplice_action(&xsplice->u.action);
> +        break;
> +    default:
> +        rc = -EOPNOTSUPP;
> +        break;
> +   }
> +
> +    return rc;
> +}
> +
> +static int __init xsplice_init(void)
> +{
> +    register_keyhandler('x', xsplice_printall, "print xsplicing info", 1);
> +    return 0;
> +}
> +__initcall(xsplice_init);

Local variable block please.

> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> index 96680eb..d549e7a 100644
> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -766,6 +766,160 @@ struct xen_sysctl_tmem_op {
>  typedef struct xen_sysctl_tmem_op xen_sysctl_tmem_op_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tmem_op_t);
>  
> +/*
> + * XEN_SYSCTL_XSPLICE_op
> + *
> + * Refer to the http://xenbits.xenproject.org/docs/unstable/misc/xsplice.html

I would refer to the file in the source tree, so docs/misc/xsplice.$FOO
which is far less likely to change.

> + * for the design details of this hyprcall.
> + */
> +
> +/*
> + * Structure describing an ELF payload. Uniquely identifies the
> + * payload. Should be human readable.
> + * Recommended length is upto XEN_XSPLICE_NAME_SIZE.
> + */
> +#define XEN_XSPLICE_NAME_SIZE 128
> +struct xen_xsplice_name {
> +    XEN_GUEST_HANDLE_64(char) name;         /* IN: pointer to name. */
> +    uint16_t size;                          /* IN: size of name. May be upto
> +                                               XEN_XSPLICE_NAME_SIZE. */
> +    uint16_t pad[3];                        /* IN: MUST be zero. */
> +};
> +typedef struct xen_xsplice_name xen_xsplice_name_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_xsplice_name_t);
> +
> +/*
> + * Upload a payload to the hypervisor. The payload is verified
> + * against basic checks and if there are any issues the proper return code
> + * will be returned. The payload is not applied at this time - that is
> + * controlled by XEN_SYSCTL_XSPLICE_ACTION.
> + *
> + * The return value is zero if the payload was succesfully uploaded.
> + * Otherwise an EXX return value is provided. Duplicate `name` are not
> + * supported.

I would recommend having a full state diagram in the xsplice
documentation and referring to that, rather than having half a "but not
this yet" set of comments in the header file.

> + *
> + * The payload at this point is verified against the basic checks.
> + *
> + * The `payload` is the ELF payload as mentioned in the `Payload format`
> + * section in the xSplice design document.
> + */
> +#define XEN_SYSCTL_XSPLICE_UPLOAD 0
> +struct xen_sysctl_xsplice_upload {
> +    xen_xsplice_name_t name;                /* IN, name of the patch. */
> +    uint64_t size;                          /* IN, size of the ELF file. */
> +    XEN_GUEST_HANDLE_64(uint8) payload;     /* IN, the ELF file. */
> +};
> +typedef struct xen_sysctl_xsplice_upload xen_sysctl_xsplice_upload_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_upload_t);
> +
> +/*
> + * Retrieve an status of an specific payload.
> + *
> + * Upon completion the `struct xen_xsplice_status` is updated.
> + *
> + * The return value is zero on success and XEN_EXX on failure. This operation
> + * is synchronous and does not require preemption.
> + */
> +#define XEN_SYSCTL_XSPLICE_GET 1
> +
> +struct xen_xsplice_status {
> +#define XSPLICE_STATE_LOADED       1
> +#define XSPLICE_STATE_CHECKED      2
> +#define XSPLICE_STATE_APPLIED      3
> +    int32_t state;                 /* OUT: XSPLICE_STATE_*. IN: MUST be zero. */
> +    int32_t rc;                    /* OUT: 0 if no error, otherwise -XEN_EXX. */
> +                                   /* IN: MUST be zero. */
> +};
> +typedef struct xen_xsplice_status xen_xsplice_status_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_xsplice_status_t);
> +
> +struct xen_sysctl_xsplice_summary {
> +    xen_xsplice_name_t name;                /* IN, name of the payload. */
> +    xen_xsplice_status_t status;            /* IN/OUT, state of it. */
> +};
> +typedef struct xen_sysctl_xsplice_summary xen_sysctl_xsplice_summary_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_summary_t);
> +
> +/*
> + * Retrieve an array of abbreviated status and names of payloads that are
> + * loaded in the hypervisor.
> + *
> + * If the hypercall returns an positive number, it is the number (up to `nr`)
> + * of the payloads returned, along with `nr` updated with the number of remaining
> + * payloads, `version` updated (it may be the same across hypercalls. If it
> + * varies the data is stale and further calls could fail). The `status`,
> + * `name`, and `len`' are updated at their designed index value (`idx`) with
> + * the returned value of data.
> + *
> + * If the hypercall returns E2BIG the `nr` is too big and should be
> + * lowered.

What would cause this situation to occur?

> + *
> + * This operation can be preempted by the hypercall returning EAGAIN.
> + * Retry.

Again, why is this necessary or useful?

> + *
> + * Note that due to the asynchronous nature of hypercalls the domain might have
> + * added or removed the number of payloads making this information stale. It is
> + * the responsibility of the toolstack to use the `version` field to check
> + * between each invocation. if the version differs it should discard the stale
> + * data and start from scratch. It is OK for the toolstack to use the new
> + * `version` field.
> + */
> +#define XEN_SYSCTL_XSPLICE_LIST 2
> +struct xen_sysctl_xsplice_list {
> +    uint32_t version;                       /* IN/OUT: Initially *MUST* be zero.
> +                                               On subsequent calls reuse value.
> +                                               If varies between calls, we are
> +                                             * getting stale data. */
> +    uint32_t idx;                           /* IN/OUT: Index into array. */
> +    uint32_t nr;                            /* IN: How many status, name, and len
> +                                               should fill out.
> +                                               OUT: How many payloads left. */
> +    uint32_t pad;                           /* IN: Must be zero. */
> +    XEN_GUEST_HANDLE_64(xen_xsplice_status_t) status;  /* OUT. Must have enough
> +                                               space allocate for nr of them. */
> +    XEN_GUEST_HANDLE_64(char) name;         /* OUT: Array of names. Each member
> +                                               MUST XEN_XSPLICE_NAME_SIZE in size.
> +                                               Must have nr of them. */
> +    XEN_GUEST_HANDLE_64(uint32) len;        /* OUT: Array of lengths of name's.
> +                                               Must have nr of them. */
> +};
> +typedef struct xen_sysctl_xsplice_list xen_sysctl_xsplice_list_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_list_t);
> +
> +/*
> + * Perform an operation on the payload structure referenced by the `name` field.
> + * The operation request is asynchronous and the status should be retrieved
> + * by using either XEN_SYSCTL_XSPLICE_GET or XEN_SYSCTL_XSPLICE_LIST hypercall.
> + */
> +#define XEN_SYSCTL_XSPLICE_ACTION 3
> +struct xen_sysctl_xsplice_action {
> +    xen_xsplice_name_t name;                /* IN, name of the patch. */
> +#define XSPLICE_ACTION_CHECK        1
> +#define XSPLICE_ACTION_UNLOAD       2
> +#define XSPLICE_ACTION_REVERT       3
> +#define XSPLICE_ACTION_APPLY        4
> +#define XSPLICE_ACTION_REPLACE      5
> +    uint32_t cmd;                           /* IN: XSPLICE_ACTION_*. */
> +    uint32_t timeout;                       /* IN: Zero if no timeout. */
> +                                            /* Or upper bound of time (ms) */
> +                                            /* for operation to take. */
> +};
> +typedef struct xen_sysctl_xsplice_action xen_sysctl_xsplice_action_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_action_t);
> +
> +struct xen_sysctl_xsplice_op {
> +    uint32_t cmd;                           /* IN: XEN_SYSCTL_XSPLICE_*. */
> +    uint32_t pad;                           /* IN: Always zero. */
> +    union {
> +        xen_sysctl_xsplice_upload_t upload;
> +        xen_sysctl_xsplice_list_t list;
> +        xen_sysctl_xsplice_summary_t get;
> +        xen_sysctl_xsplice_action_t action;
> +    } u;
> +};
> +typedef struct xen_sysctl_xsplice_op xen_sysctl_xsplice_op_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_op_t);
> +
>  struct xen_sysctl {
>      uint32_t cmd;
>  #define XEN_SYSCTL_readconsole                    1
> @@ -791,6 +945,7 @@ struct xen_sysctl {
>  #define XEN_SYSCTL_pcitopoinfo                   22
>  #define XEN_SYSCTL_psr_cat_op                    23
>  #define XEN_SYSCTL_tmem_op                       24
> +#define XEN_SYSCTL_xsplice_op                    25
>      uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
>      union {
>          struct xen_sysctl_readconsole       readconsole;
> @@ -816,6 +971,7 @@ struct xen_sysctl {
>          struct xen_sysctl_psr_cmt_op        psr_cmt_op;
>          struct xen_sysctl_psr_cat_op        psr_cat_op;
>          struct xen_sysctl_tmem_op           tmem_op;
> +        struct xen_sysctl_xsplice_op        xsplice;
>          uint8_t                             pad[128];
>      } u;
>  };
> diff --git a/xen/include/xen/xsplice.h b/xen/include/xen/xsplice.h
> new file mode 100644
> index 0000000..cf465c4
> --- /dev/null
> +++ b/xen/include/xen/xsplice.h
> @@ -0,0 +1,15 @@
> +#ifndef __XEN_XSPLICE_H__
> +#define __XEN_XSPLICE_H__
> +
> +struct xen_sysctl_xsplice_op;
> +
> +#ifdef CONFIG_XSPLICE

No reason for this all to be squashed completely together.

> +int xsplice_control(struct xen_sysctl_xsplice_op *);
> +#else
> +#include <xen/errno.h> /* For -ENOSYS */
> +static inline int xsplice_control(struct xen_sysctl_xsplice_op *op)
> +{
> +    return -ENOSYS;
> +}
> +#endif
> +#endif /* __XEN_XSPLICE_H__ */

Variable block please.

~Andrew
Konrad Rzeszutek Wilk Feb. 12, 2016, 8:40 p.m. UTC | #2
> > diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> > index 96680eb..d549e7a 100644
> > --- a/xen/include/public/sysctl.h
> > +++ b/xen/include/public/sysctl.h
> > @@ -766,6 +766,160 @@ struct xen_sysctl_tmem_op {
> >  typedef struct xen_sysctl_tmem_op xen_sysctl_tmem_op_t;
> >  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tmem_op_t);
> >  
> > +/*
> > + * XEN_SYSCTL_XSPLICE_op
> > + *
> > + * Refer to the http://xenbits.xenproject.org/docs/unstable/misc/xsplice.html
> 
> I would refer to the file in the source tree, so docs/misc/xsplice.$FOO
> which is far less likely to change.

The initial patch had exactly that -  but Jan asked me to change it to the
URL. Shall I include both of them?

.. snip..
> > + * Retrieve an array of abbreviated status and names of payloads that are
> > + * loaded in the hypervisor.
> > + *
> > + * If the hypercall returns an positive number, it is the number (up to `nr`)
> > + * of the payloads returned, along with `nr` updated with the number of remaining
> > + * payloads, `version` updated (it may be the same across hypercalls. If it
> > + * varies the data is stale and further calls could fail). The `status`,
> > + * `name`, and `len`' are updated at their designed index value (`idx`) with
> > + * the returned value of data.
> > + *
> > + * If the hypercall returns E2BIG the `nr` is too big and should be
> > + * lowered.
> 
> What would cause this situation to occur?

If the hypervisor decided that the 'nr' is too big. It is hardcoded to an value - but
I don't think it makes sense to mention that in the header filer.
> 
> > + *
> > + * This operation can be preempted by the hypercall returning EAGAIN.
> > + * Retry.
> 
> Again, why is this necessary or useful?

Actually it is a lie. I've updated the design document but forgot to remove it here!

Thanks for the comments! Let me update the file..
Andrew Cooper Feb. 12, 2016, 8:53 p.m. UTC | #3
On 12/02/16 20:40, Konrad Rzeszutek Wilk wrote:
>>> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
>>> index 96680eb..d549e7a 100644
>>> --- a/xen/include/public/sysctl.h
>>> +++ b/xen/include/public/sysctl.h
>>> @@ -766,6 +766,160 @@ struct xen_sysctl_tmem_op {
>>>  typedef struct xen_sysctl_tmem_op xen_sysctl_tmem_op_t;
>>>  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tmem_op_t);
>>>  
>>> +/*
>>> + * XEN_SYSCTL_XSPLICE_op
>>> + *
>>> + * Refer to the http://xenbits.xenproject.org/docs/unstable/misc/xsplice.html
>> I would refer to the file in the source tree, so docs/misc/xsplice.$FOO
>> which is far less likely to change.
> The initial patch had exactly that -  but Jan asked me to change it to the
> URL. Shall I include both of them?

Ok then.  (most other docs references are relative to the source tree...)

~Andrew
Jan Beulich Feb. 15, 2016, 8:16 a.m. UTC | #4
>>> On 12.02.16 at 21:40, <konrad.wilk@oracle.com> wrote:
>> > diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
>> > index 96680eb..d549e7a 100644
>> > --- a/xen/include/public/sysctl.h
>> > +++ b/xen/include/public/sysctl.h
>> > @@ -766,6 +766,160 @@ struct xen_sysctl_tmem_op {
>> >  typedef struct xen_sysctl_tmem_op xen_sysctl_tmem_op_t;
>> >  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tmem_op_t);
>> >  
>> > +/*
>> > + * XEN_SYSCTL_XSPLICE_op
>> > + *
>> > + * Refer to the 
> http://xenbits.xenproject.org/docs/unstable/misc/xsplice.html 
>> 
>> I would refer to the file in the source tree, so docs/misc/xsplice.$FOO
>> which is far less likely to change.
> 
> The initial patch had exactly that -  but Jan asked me to change it to the
> URL. Shall I include both of them?

Well, I have to admit that I don't recall, and don't see why I would
have. I agree with Andrew that an in-tree reference would be
better. Maybe I said this neglecting that the (supposedly) first
patch puts the respective doc in place...

Jan
Konrad Rzeszutek Wilk Feb. 19, 2016, 7:36 p.m. UTC | #5
> >  long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
> >  {
> > @@ -460,6 +461,12 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
> >          ret = tmem_control(&op->u.tmem_op);
> >          break;
> >  
> > +    case XEN_SYSCTL_xsplice_op:
> > +        ret = xsplice_control(&op->u.xsplice);
> 
> Could we name this do_xsplice_op() to match prevailing subop style.

There are two instances of that: do_get_pm_info, do_pm_op.

Then variations of 'do' are: cpupool_do_sysctl, arch_do_physinfo, and
arch_do_sysctl.

And then ones enjoying 'op' in it:
sysctl_coverage_op

And then 'control' ones:
spinlock_profile_control, tmem_control, perfc_control, tb_control.

So we have 2 vs 3 vs 1 vs 4.

I would say that the name 'xsplice_control' is the prevailing style?

Unless you want me to take a union of them, perhaps:

 do_xsplice_control_op ?

<chuckles>

I will change it to what you prefer - do_xsplice_op.
> 
> > +        if ( ret != -ENOSYS )
> > +            copyback = 1;
> > +        break;
> > +
> 
> Not related to this patch.  I (and by this, I mean someone with time ;p)
> should do some cleanup and pass copyback by pointer to subops.  This
> allows for finer grain control of whether a copyback is needed.

Yes indeed. But then how often do you do sysctl hypercalls?
Andrew Cooper Feb. 19, 2016, 7:43 p.m. UTC | #6
On 19/02/2016 19:36, Konrad Rzeszutek Wilk wrote:
>>>  long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
>>>  {
>>> @@ -460,6 +461,12 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
>>>          ret = tmem_control(&op->u.tmem_op);
>>>          break;
>>>  
>>> +    case XEN_SYSCTL_xsplice_op:
>>> +        ret = xsplice_control(&op->u.xsplice);
>> Could we name this do_xsplice_op() to match prevailing subop style.
> There are two instances of that: do_get_pm_info, do_pm_op.
>
> Then variations of 'do' are: cpupool_do_sysctl, arch_do_physinfo, and
> arch_do_sysctl.
>
> And then ones enjoying 'op' in it:
> sysctl_coverage_op
>
> And then 'control' ones:
> spinlock_profile_control, tmem_control, perfc_control, tb_control.
>
> So we have 2 vs 3 vs 1 vs 4.
>
> I would say that the name 'xsplice_control' is the prevailing style?
>
> Unless you want me to take a union of them, perhaps:
>
>  do_xsplice_control_op ?
>
> <chuckles>
>
> I will change it to what you prefer - do_xsplice_op.

The important bit (for logically associating different bits of code) is
to have the main stem matching the hypercall op name.  Simply
"xsplice_op()" would be ok, and could naturally be extended to
arch_xsplice_op() if the need arises.

>>> +        if ( ret != -ENOSYS )
>>> +            copyback = 1;
>>> +        break;
>>> +
>> Not related to this patch.  I (and by this, I mean someone with time ;p)
>> should do some cleanup and pass copyback by pointer to subops.  This
>> allows for finer grain control of whether a copyback is needed.
> Yes indeed. But then how often do you do sysctl hypercalls?

The purpose is for simplifying the in-hypervisor codepaths, rather than
performance.  (A side effect would be to reduce the size of the
alternatives table patching stac/clac instructions).

~Andrew
diff mbox

Patch

diff --git a/tools/flask/policy/policy/modules/xen/xen.te b/tools/flask/policy/policy/modules/xen/xen.te
index d35ae22..542c3e1 100644
--- a/tools/flask/policy/policy/modules/xen/xen.te
+++ b/tools/flask/policy/policy/modules/xen/xen.te
@@ -72,6 +72,7 @@  allow dom0_t xen_t:xen2 {
 allow dom0_t xen_t:xen2 {
     pmu_ctrl
     get_symbol
+    xsplice_op
 };
 allow dom0_t xen_t:mmu memorymap;
 
diff --git a/xen/common/Kconfig b/xen/common/Kconfig
index 6f404b4..619aa9e 100644
--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -152,4 +152,14 @@  config SCHED_DEFAULT
 
 endmenu
 
+# Enable/Disable xsplice support
+config XSPLICE
+	bool "xsplice support"
+	default y
+	---help---
+	  Allows a running Xen hypervisor to be patched without rebooting.
+	  This is primarily used to patch an hypervisor with XSA fixes.
+
+	  If unsure, say Y.
+
 endmenu
diff --git a/xen/common/Makefile b/xen/common/Makefile
index 6e82b33..43b3911 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -72,3 +72,5 @@  subdir-$(coverage) += gcov
 
 subdir-y += libelf
 subdir-$(CONFIG_HAS_DEVICE_TREE) += libfdt
+
+obj-$(CONFIG_XSPLICE) += xsplice.o
diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
index 1624024..68e3eb4 100644
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -28,6 +28,7 @@ 
 #include <xsm/xsm.h>
 #include <xen/pmstat.h>
 #include <xen/gcov.h>
+#include <xen/xsplice.h>
 
 long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
 {
@@ -460,6 +461,12 @@  long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
         ret = tmem_control(&op->u.tmem_op);
         break;
 
+    case XEN_SYSCTL_xsplice_op:
+        ret = xsplice_control(&op->u.xsplice);
+        if ( ret != -ENOSYS )
+            copyback = 1;
+        break;
+
     default:
         ret = arch_do_sysctl(op, u_sysctl);
         copyback = 0;
diff --git a/xen/common/xsplice.c b/xen/common/xsplice.c
new file mode 100644
index 0000000..125d9b8
--- /dev/null
+++ b/xen/common/xsplice.c
@@ -0,0 +1,386 @@ 
+/*
+ * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved.
+ *
+ */
+
+#include <xen/guest_access.h>
+#include <xen/keyhandler.h>
+#include <xen/lib.h>
+#include <xen/list.h>
+#include <xen/mm.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <xen/spinlock.h>
+#include <xen/xsplice.h>
+
+#include <asm/event.h>
+#include <public/sysctl.h>
+
+static DEFINE_SPINLOCK(payload_lock);
+static LIST_HEAD(payload_list);
+
+static unsigned int payload_cnt;
+static unsigned int payload_version = 1;
+
+struct payload {
+    int32_t state;                       /* One of the XSPLICE_STATE_*. */
+    int32_t rc;                          /* 0 or -XEN_EXX. */
+    struct list_head list;               /* Linked to 'payload_list'. */
+    char name[XEN_XSPLICE_NAME_SIZE + 1];/* Name of it. */
+};
+
+static const char *state2str(int32_t state)
+{
+#define STATE(x) [XSPLICE_STATE_##x] = #x
+    static const char *const names[] = {
+            STATE(LOADED),
+            STATE(CHECKED),
+            STATE(APPLIED),
+    };
+#undef STATE
+
+    if (state >= ARRAY_SIZE(names))
+        return "unknown";
+
+    if (state < 0)
+        return "-EXX";
+
+    if (!names[state])
+        return "unknown";
+
+    return names[state];
+}
+
+static void xsplice_printall(unsigned char key)
+{
+    struct payload *data;
+
+    spin_lock(&payload_lock);
+
+    list_for_each_entry ( data, &payload_list, list )
+        printk(" name=%s state=%s(%d)\n", data->name,
+               state2str(data->state), data->state);
+
+    spin_unlock(&payload_lock);
+}
+
+static int verify_name(xen_xsplice_name_t *name)
+{
+    if ( name->size == 0 || name->size > XEN_XSPLICE_NAME_SIZE )
+        return -EINVAL;
+
+    if ( name->pad[0] || name->pad[1] || name->pad[2] )
+        return -EINVAL;
+
+    if ( !guest_handle_okay(name->name, name->size) )
+        return -EINVAL;
+
+    return 0;
+}
+
+static int find_payload(xen_xsplice_name_t *name, bool_t need_lock,
+                        struct payload **f)
+{
+    struct payload *data;
+    XEN_GUEST_HANDLE_PARAM(char) str;
+    char n[XEN_XSPLICE_NAME_SIZE + 1] = { 0 };
+    int rc = -EINVAL;
+
+    rc = verify_name(name);
+    if ( rc )
+        return rc;
+
+    str = guest_handle_cast(name->name, char);
+    if ( copy_from_guest(n, str, name->size) )
+        return -EFAULT;
+
+    if ( need_lock )
+        spin_lock(&payload_lock);
+
+    rc = -ENOENT;
+    list_for_each_entry ( data, &payload_list, list )
+    {
+        if ( !strcmp(data->name, n) )
+        {
+            *f = data;
+            rc = 0;
+            break;
+        }
+    }
+
+    if ( need_lock )
+        spin_unlock(&payload_lock);
+
+    return rc;
+}
+
+static int verify_payload(xen_sysctl_xsplice_upload_t *upload)
+{
+    if ( verify_name(&upload->name) )
+        return -EINVAL;
+
+    if ( upload->size == 0 )
+        return -EINVAL;
+
+    if ( !guest_handle_okay(upload->payload, upload->size) )
+        return -EFAULT;
+
+    return 0;
+}
+
+/*
+ * We MUST be holding the payload_lock spinlock.
+ */
+static void free_payload(struct payload *data)
+{
+    list_del(&data->list);
+    payload_cnt--;
+    payload_version++;
+    xfree(data);
+}
+
+static int xsplice_upload(xen_sysctl_xsplice_upload_t *upload)
+{
+    struct payload *data = NULL;
+    uint8_t *raw_data;
+    int rc;
+
+    rc = verify_payload(upload);
+    if ( rc )
+        return rc;
+
+    rc = find_payload(&upload->name, 1 /* true. */, &data);
+    if ( rc == 0 /* Found. */ )
+        return -EEXIST;
+
+    if ( rc != -ENOENT )
+        return rc;
+
+    data = xzalloc(struct payload);
+    if ( !data )
+        return -ENOMEM;
+
+    memset(data, 0, sizeof *data);
+    rc = -EFAULT;
+    if ( copy_from_guest(data->name, upload->name.name, upload->name.size) )
+        goto err_data;
+
+    rc = -ENOMEM;
+    raw_data = alloc_xenheap_pages(get_order_from_bytes(upload->size), 0);
+    if ( !raw_data )
+        goto err_data;
+
+    rc = -EFAULT;
+    if ( copy_from_guest(raw_data, upload->payload, upload->size) )
+        goto err_raw;
+
+    data->state = XSPLICE_STATE_LOADED;
+    data->rc = 0;
+    INIT_LIST_HEAD(&data->list);
+
+    spin_lock(&payload_lock);
+    list_add_tail(&data->list, &payload_list);
+    payload_cnt++;
+    payload_version++;
+    spin_unlock(&payload_lock);
+
+    free_xenheap_pages(raw_data, get_order_from_bytes(upload->size));
+    return 0;
+
+ err_raw:
+    free_xenheap_pages(raw_data, get_order_from_bytes(upload->size));
+ err_data:
+    xfree(data);
+    return rc;
+}
+
+static int xsplice_get(xen_sysctl_xsplice_summary_t *summary)
+{
+    struct payload *data;
+    int rc;
+
+    if ( summary->status.state )
+        return -EINVAL;
+
+    if ( summary->status.rc != 0 )
+        return -EINVAL;
+
+    rc = verify_name(&summary->name);
+    if ( rc )
+        return rc;
+
+    rc = find_payload(&summary->name, 1 /* true. */, &data);
+    if ( rc )
+        return rc;
+
+    summary->status.state = data->state;
+    summary->status.rc = data->rc;
+
+    return 0;
+}
+
+static int xsplice_list(xen_sysctl_xsplice_list_t *list)
+{
+    xen_xsplice_status_t status;
+    struct payload *data;
+    unsigned int idx = 0, i = 0;
+    int rc = 0;
+
+    if ( list->nr > 1024 )
+        return -E2BIG;
+
+    if ( list->pad != 0 )
+        return -EINVAL;
+
+    if ( !guest_handle_okay(list->status, sizeof(status) * list->nr) ||
+         !guest_handle_okay(list->name, XEN_XSPLICE_NAME_SIZE * list->nr) ||
+         !guest_handle_okay(list->len, sizeof(uint32_t) * list->nr) )
+        return -EINVAL;
+
+    spin_lock(&payload_lock);
+    if ( list->idx > payload_cnt || !list->nr )
+    {
+        spin_unlock(&payload_lock);
+        return -EINVAL;
+    }
+
+    list_for_each_entry( data, &payload_list, list )
+    {
+        uint32_t len;
+
+        if ( list->idx > i++ )
+            continue;
+
+        status.state = data->state;
+        status.rc = data->rc;
+        len = strlen(data->name);
+
+        /* N.B. 'idx' != 'i'. */
+        if ( __copy_to_guest_offset(list->name, idx * XEN_XSPLICE_NAME_SIZE,
+                                    data->name, len) ||
+             __copy_to_guest_offset(list->len, idx, &len, 1) ||
+             __copy_to_guest_offset(list->status, idx, &status, 1) )
+        {
+            rc = -EFAULT;
+            break;
+        }
+        idx++;
+        if ( hypercall_preempt_check() || (idx + 1 > list->nr) )
+            break;
+    }
+    list->nr = payload_cnt - i; /* Remaining amount. */
+    list->version = payload_version;
+    spin_unlock(&payload_lock);
+
+    /* And how many we have processed. */
+    return rc ? : idx;
+}
+
+static int xsplice_action(xen_sysctl_xsplice_action_t *action)
+{
+    struct payload *data;
+    int rc;
+
+    rc = verify_name(&action->name);
+    if ( rc )
+        return rc;
+
+    spin_lock(&payload_lock);
+    rc = find_payload(&action->name, 0 /* We are holding the lock. */, &data);
+    if ( rc )
+        goto out;
+
+    switch ( action->cmd )
+    {
+    case XSPLICE_ACTION_CHECK:
+        if ( (data->state == XSPLICE_STATE_LOADED) ||
+             (data->state == XSPLICE_STATE_CHECKED) )
+        {
+            /* No implementation yet. */
+            data->state = XSPLICE_STATE_CHECKED;
+            data->rc = 0;
+            rc = 0;
+        }
+        break;
+    case XSPLICE_ACTION_UNLOAD:
+        if ( (data->state == XSPLICE_STATE_LOADED) ||
+             (data->state == XSPLICE_STATE_CHECKED) )
+        {
+            free_payload(data);
+            /* No touching 'data' from here on! */
+            rc = 0;
+        }
+        break;
+    case XSPLICE_ACTION_REVERT:
+        if ( data->state == XSPLICE_STATE_APPLIED )
+        {
+            /* No implementation yet. */
+            data->state = XSPLICE_STATE_CHECKED;
+            data->rc = 0;
+            rc = 0;
+        }
+        break;
+    case XSPLICE_ACTION_APPLY:
+        if ( (data->state == XSPLICE_STATE_CHECKED) )
+        {
+            /* No implementation yet. */
+            data->state = XSPLICE_STATE_APPLIED;
+            data->rc = 0;
+            rc = 0;
+        }
+        break;
+    case XSPLICE_ACTION_REPLACE:
+        if ( data->state == XSPLICE_STATE_CHECKED )
+        {
+            /* No implementation yet. */
+            data->state = XSPLICE_STATE_CHECKED;
+            data->rc = 0;
+            rc = 0;
+        }
+        break;
+    default:
+        rc = -EOPNOTSUPP;
+        break;
+    }
+
+ out:
+    spin_unlock(&payload_lock);
+
+    return rc;
+}
+
+int xsplice_control(xen_sysctl_xsplice_op_t *xsplice)
+{
+    int rc;
+
+    if ( xsplice->pad != 0 )
+        return -EINVAL;
+
+    switch ( xsplice->cmd )
+    {
+    case XEN_SYSCTL_XSPLICE_UPLOAD:
+        rc = xsplice_upload(&xsplice->u.upload);
+        break;
+    case XEN_SYSCTL_XSPLICE_GET:
+        rc = xsplice_get(&xsplice->u.get);
+        break;
+    case XEN_SYSCTL_XSPLICE_LIST:
+        rc = xsplice_list(&xsplice->u.list);
+        break;
+    case XEN_SYSCTL_XSPLICE_ACTION:
+        rc = xsplice_action(&xsplice->u.action);
+        break;
+    default:
+        rc = -EOPNOTSUPP;
+        break;
+   }
+
+    return rc;
+}
+
+static int __init xsplice_init(void)
+{
+    register_keyhandler('x', xsplice_printall, "print xsplicing info", 1);
+    return 0;
+}
+__initcall(xsplice_init);
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 96680eb..d549e7a 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -766,6 +766,160 @@  struct xen_sysctl_tmem_op {
 typedef struct xen_sysctl_tmem_op xen_sysctl_tmem_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tmem_op_t);
 
+/*
+ * XEN_SYSCTL_XSPLICE_op
+ *
+ * Refer to the http://xenbits.xenproject.org/docs/unstable/misc/xsplice.html
+ * for the design details of this hyprcall.
+ */
+
+/*
+ * Structure describing an ELF payload. Uniquely identifies the
+ * payload. Should be human readable.
+ * Recommended length is upto XEN_XSPLICE_NAME_SIZE.
+ */
+#define XEN_XSPLICE_NAME_SIZE 128
+struct xen_xsplice_name {
+    XEN_GUEST_HANDLE_64(char) name;         /* IN: pointer to name. */
+    uint16_t size;                          /* IN: size of name. May be upto
+                                               XEN_XSPLICE_NAME_SIZE. */
+    uint16_t pad[3];                        /* IN: MUST be zero. */
+};
+typedef struct xen_xsplice_name xen_xsplice_name_t;
+DEFINE_XEN_GUEST_HANDLE(xen_xsplice_name_t);
+
+/*
+ * Upload a payload to the hypervisor. The payload is verified
+ * against basic checks and if there are any issues the proper return code
+ * will be returned. The payload is not applied at this time - that is
+ * controlled by XEN_SYSCTL_XSPLICE_ACTION.
+ *
+ * The return value is zero if the payload was succesfully uploaded.
+ * Otherwise an EXX return value is provided. Duplicate `name` are not
+ * supported.
+ *
+ * The payload at this point is verified against the basic checks.
+ *
+ * The `payload` is the ELF payload as mentioned in the `Payload format`
+ * section in the xSplice design document.
+ */
+#define XEN_SYSCTL_XSPLICE_UPLOAD 0
+struct xen_sysctl_xsplice_upload {
+    xen_xsplice_name_t name;                /* IN, name of the patch. */
+    uint64_t size;                          /* IN, size of the ELF file. */
+    XEN_GUEST_HANDLE_64(uint8) payload;     /* IN, the ELF file. */
+};
+typedef struct xen_sysctl_xsplice_upload xen_sysctl_xsplice_upload_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_upload_t);
+
+/*
+ * Retrieve an status of an specific payload.
+ *
+ * Upon completion the `struct xen_xsplice_status` is updated.
+ *
+ * The return value is zero on success and XEN_EXX on failure. This operation
+ * is synchronous and does not require preemption.
+ */
+#define XEN_SYSCTL_XSPLICE_GET 1
+
+struct xen_xsplice_status {
+#define XSPLICE_STATE_LOADED       1
+#define XSPLICE_STATE_CHECKED      2
+#define XSPLICE_STATE_APPLIED      3
+    int32_t state;                 /* OUT: XSPLICE_STATE_*. IN: MUST be zero. */
+    int32_t rc;                    /* OUT: 0 if no error, otherwise -XEN_EXX. */
+                                   /* IN: MUST be zero. */
+};
+typedef struct xen_xsplice_status xen_xsplice_status_t;
+DEFINE_XEN_GUEST_HANDLE(xen_xsplice_status_t);
+
+struct xen_sysctl_xsplice_summary {
+    xen_xsplice_name_t name;                /* IN, name of the payload. */
+    xen_xsplice_status_t status;            /* IN/OUT, state of it. */
+};
+typedef struct xen_sysctl_xsplice_summary xen_sysctl_xsplice_summary_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_summary_t);
+
+/*
+ * Retrieve an array of abbreviated status and names of payloads that are
+ * loaded in the hypervisor.
+ *
+ * If the hypercall returns an positive number, it is the number (up to `nr`)
+ * of the payloads returned, along with `nr` updated with the number of remaining
+ * payloads, `version` updated (it may be the same across hypercalls. If it
+ * varies the data is stale and further calls could fail). The `status`,
+ * `name`, and `len`' are updated at their designed index value (`idx`) with
+ * the returned value of data.
+ *
+ * If the hypercall returns E2BIG the `nr` is too big and should be
+ * lowered.
+ *
+ * This operation can be preempted by the hypercall returning EAGAIN.
+ * Retry.
+ *
+ * Note that due to the asynchronous nature of hypercalls the domain might have
+ * added or removed the number of payloads making this information stale. It is
+ * the responsibility of the toolstack to use the `version` field to check
+ * between each invocation. if the version differs it should discard the stale
+ * data and start from scratch. It is OK for the toolstack to use the new
+ * `version` field.
+ */
+#define XEN_SYSCTL_XSPLICE_LIST 2
+struct xen_sysctl_xsplice_list {
+    uint32_t version;                       /* IN/OUT: Initially *MUST* be zero.
+                                               On subsequent calls reuse value.
+                                               If varies between calls, we are
+                                             * getting stale data. */
+    uint32_t idx;                           /* IN/OUT: Index into array. */
+    uint32_t nr;                            /* IN: How many status, name, and len
+                                               should fill out.
+                                               OUT: How many payloads left. */
+    uint32_t pad;                           /* IN: Must be zero. */
+    XEN_GUEST_HANDLE_64(xen_xsplice_status_t) status;  /* OUT. Must have enough
+                                               space allocate for nr of them. */
+    XEN_GUEST_HANDLE_64(char) name;         /* OUT: Array of names. Each member
+                                               MUST XEN_XSPLICE_NAME_SIZE in size.
+                                               Must have nr of them. */
+    XEN_GUEST_HANDLE_64(uint32) len;        /* OUT: Array of lengths of name's.
+                                               Must have nr of them. */
+};
+typedef struct xen_sysctl_xsplice_list xen_sysctl_xsplice_list_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_list_t);
+
+/*
+ * Perform an operation on the payload structure referenced by the `name` field.
+ * The operation request is asynchronous and the status should be retrieved
+ * by using either XEN_SYSCTL_XSPLICE_GET or XEN_SYSCTL_XSPLICE_LIST hypercall.
+ */
+#define XEN_SYSCTL_XSPLICE_ACTION 3
+struct xen_sysctl_xsplice_action {
+    xen_xsplice_name_t name;                /* IN, name of the patch. */
+#define XSPLICE_ACTION_CHECK        1
+#define XSPLICE_ACTION_UNLOAD       2
+#define XSPLICE_ACTION_REVERT       3
+#define XSPLICE_ACTION_APPLY        4
+#define XSPLICE_ACTION_REPLACE      5
+    uint32_t cmd;                           /* IN: XSPLICE_ACTION_*. */
+    uint32_t timeout;                       /* IN: Zero if no timeout. */
+                                            /* Or upper bound of time (ms) */
+                                            /* for operation to take. */
+};
+typedef struct xen_sysctl_xsplice_action xen_sysctl_xsplice_action_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_action_t);
+
+struct xen_sysctl_xsplice_op {
+    uint32_t cmd;                           /* IN: XEN_SYSCTL_XSPLICE_*. */
+    uint32_t pad;                           /* IN: Always zero. */
+    union {
+        xen_sysctl_xsplice_upload_t upload;
+        xen_sysctl_xsplice_list_t list;
+        xen_sysctl_xsplice_summary_t get;
+        xen_sysctl_xsplice_action_t action;
+    } u;
+};
+typedef struct xen_sysctl_xsplice_op xen_sysctl_xsplice_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_op_t);
+
 struct xen_sysctl {
     uint32_t cmd;
 #define XEN_SYSCTL_readconsole                    1
@@ -791,6 +945,7 @@  struct xen_sysctl {
 #define XEN_SYSCTL_pcitopoinfo                   22
 #define XEN_SYSCTL_psr_cat_op                    23
 #define XEN_SYSCTL_tmem_op                       24
+#define XEN_SYSCTL_xsplice_op                    25
     uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
     union {
         struct xen_sysctl_readconsole       readconsole;
@@ -816,6 +971,7 @@  struct xen_sysctl {
         struct xen_sysctl_psr_cmt_op        psr_cmt_op;
         struct xen_sysctl_psr_cat_op        psr_cat_op;
         struct xen_sysctl_tmem_op           tmem_op;
+        struct xen_sysctl_xsplice_op        xsplice;
         uint8_t                             pad[128];
     } u;
 };
diff --git a/xen/include/xen/xsplice.h b/xen/include/xen/xsplice.h
new file mode 100644
index 0000000..cf465c4
--- /dev/null
+++ b/xen/include/xen/xsplice.h
@@ -0,0 +1,15 @@ 
+#ifndef __XEN_XSPLICE_H__
+#define __XEN_XSPLICE_H__
+
+struct xen_sysctl_xsplice_op;
+
+#ifdef CONFIG_XSPLICE
+int xsplice_control(struct xen_sysctl_xsplice_op *);
+#else
+#include <xen/errno.h> /* For -ENOSYS */
+static inline int xsplice_control(struct xen_sysctl_xsplice_op *op)
+{
+    return -ENOSYS;
+}
+#endif
+#endif /* __XEN_XSPLICE_H__ */
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index f63c3e2..c856e1e 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -807,6 +807,12 @@  static int flask_sysctl(int cmd)
     case XEN_SYSCTL_tmem_op:
         return domain_has_xen(current->domain, XEN__TMEM_CONTROL);
 
+#ifdef CONFIG_XSPLICE
+    case XEN_SYSCTL_xsplice_op:
+        return avc_current_has_perm(SECINITSID_XEN, SECCLASS_XEN2,
+                                    XEN2__XSPLICE_OP, NULL);
+#endif
+
     default:
         printk("flask_sysctl: Unknown op %d\n", cmd);
         return -EPERM;
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index effb59f..5f08d05 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -93,6 +93,8 @@  class xen2
     pmu_ctrl
 # PMU use (domains, including unprivileged ones, will be using this operation)
     pmu_use
+# XEN_SYSCTL_xsplice_op
+    xsplice_op
 }
 
 # Classes domain and domain2 consist of operations that a domain performs on