diff mbox

[v4,04/34] HYPERCALL_version_op. New hypercall mirroring XENVER_ but sane.

Message ID 1458064616-23101-5-git-send-email-konrad.wilk@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Konrad Rzeszutek Wilk March 15, 2016, 5:56 p.m. UTC
This hypercall mirrors the XENVER_ in that it has similar functionality.
However it is designed differently:
 - No compat layer. The data structures are the same size on 32
   as on 64-bit.
 - The hypercall accepts three arguments - the command, pointer to
   an buffer, and the length of the buffer.
 - Each sub-ops can be "probed" for size by returning the size of
   buffer that will be needed - if the buffer is NULL.
 - Subops can complete even if the buffer is too slow - truncated
   data will be filled and hypercall will return -ENOBUFS.
 - VERSION_OP_commandline, VERSION_OP_changeset are privileged.
 - There are no XENVER_compile_info equivalent.
 - The hypercall can return -EPERM and toolstack/OSes are expected
   to deal with it.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

---
Cc: Daniel De Graaf <dgdegra@tycho.nsa.gov>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Stefano Stabellini <stefano.stabellini@citrix.com>
Cc: Julien Grall <julien.grall@arm.com>
Cc: Keir Fraser <keir@xen.org>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
---
---
 tools/flask/policy/policy/modules/xen/xen.te |   9 +-
 xen/arch/arm/traps.c                         |   1 +
 xen/arch/x86/hvm/hvm.c                       |   1 +
 xen/arch/x86/x86_64/compat/entry.S           |   2 +
 xen/arch/x86/x86_64/entry.S                  |   2 +
 xen/common/compat/kernel.c                   |   3 +
 xen/common/kernel.c                          | 265 +++++++++++++++++++++++----
 xen/include/public/arch-arm.h                |   3 +
 xen/include/public/version.h                 |  72 +++++++-
 xen/include/public/xen.h                     |   1 +
 xen/include/xen/hypercall.h                  |   4 +
 xen/include/xsm/dummy.h                      |  19 ++
 xen/include/xsm/xsm.h                        |   7 +
 xen/xsm/dummy.c                              |   1 +
 xen/xsm/flask/hooks.c                        |  39 ++++
 xen/xsm/flask/policy/access_vectors          |  24 ++-
 16 files changed, 410 insertions(+), 43 deletions(-)

Comments

Andrew Cooper March 15, 2016, 6:29 p.m. UTC | #1
On 15/03/16 17:56, Konrad Rzeszutek Wilk wrote:
> @@ -388,6 +395,188 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>      return -ENOSYS;
>  }
>  
> +static const char *capabilities_info(ssize_t *len)
> +{
> +    static xen_capabilities_info_t cached_cap;
> +    static unsigned int cached_cap_len;
> +    static bool_t cached;
> +
> +    if ( cached )

I am surprised that Coverity didn't complain about this being unused...

> +    {
> +        *len = cached_cap_len;
> +        return cached_cap;
> +    }
> +    arch_get_xen_caps(&cached_cap);
> +    cached_cap_len = strlen(cached_cap) + 1;
> +
> +    *len = cached_cap_len;
> +    return cached_cap;

You can turn the logic around as

if ( unliklely(!cached) )
{
    arch_get_xen_caps(&cached_cap);
    cached_cap_len = strlen(cached_cap) + 1;
    cached = 1;
}

and have a single return path.

> +}
> +
> +static int size_of_subops_data(unsigned int cmd, ssize_t *sz)
> +{
> +    int rc = 0;
> +    /* Compute size. */
> +    switch ( cmd )
> +    {
> +    case XEN_VERSION_OP_version:
> +        *sz = sizeof(xen_version_op_val_t);
> +        break;
> +
> +    case XEN_VERSION_OP_extraversion:
> +        *sz = strlen(xen_extra_version()) + 1;
> +        break;
> +
> +    case XEN_VERSION_OP_capabilities:
> +        capabilities_info(sz);
> +        break;
> +
> +    case XEN_VERSION_OP_platform_parameters:
> +        *sz = sizeof(xen_version_op_val_t);
> +        break;
> +
> +    case XEN_VERSION_OP_changeset:
> +        *sz = strlen(xen_changeset()) + 1;
> +        break;
> +
> +    case XEN_VERSION_OP_get_features:
> +        *sz = sizeof(xen_feature_info_t);
> +        break;
> +
> +    case XEN_VERSION_OP_pagesize:
> +        *sz = sizeof(xen_version_op_val_t);
> +        break;
> +
> +    case XEN_VERSION_OP_guest_handle:
> +        *sz = ARRAY_SIZE(current->domain->handle);
> +        break;
> +
> +    case XEN_VERSION_OP_commandline:
> +        *sz = ARRAY_SIZE(saved_cmdline);
> +        break;
> +
> +    default:
> +        rc = -ENOSYS;
> +    }
> +
> +    return rc;
> +}
> +
> +/*
> + * Similar to HYPERVISOR_xen_version but with a sane interface
> + * (has a length, one can probe for the length) and with one less sub-ops:
> + * missing XENVER_compile_info.
> + */
> +DO(version_op)(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg,
> +               unsigned int len)
> +{
> +    union {
> +        xen_version_op_val_t n;
> +        xen_feature_info_t fi;
> +    } u;

= {}; and you can forgo the explicit memset() below.

> +    ssize_t sz = 0;
> +    const void *ptr = NULL;
> +    int rc = xsm_version_op(XSM_OTHER, cmd);
> +
> +    /* We can safely return -EPERM! */
> +    if ( rc )
> +        return rc;
> +
> +    rc = size_of_subops_data(cmd, &sz);
> +    if ( rc )
> +        return rc;
> +
> +    /* Some of the subops may have no data. */
> +    if ( !sz )
> +        return 0;

Really? I would have thought it would be reasonable to assert that
either sz != 0 after the rc != 0 return.

> +    /*
> +     * This hypercall also allows the client to probe. If it provides
> +     * a NULL arg we will return the size of the space it has to
> +     * allocate for the specific sub-op.
> +     */
> +    if ( guest_handle_is_null(arg) )
> +        return sz;
> +
> +    memset(&u, 0, sizeof(u));
> +    /*
> +     * The HYPERVISOR_xen_version differs in that some return the value,
> +     * and some copy it on back on argument. We follow the same rule for all
> +     * sub-ops: return 0 on success, positive value of bytes returned, and
> +     * always copy the result in arg. Yeey sanity!
> +     */
> +
> +    rc = 0;

rc is guaranteed to be 0 at this point.

> +    switch ( cmd )
> +    {
> +    case XEN_VERSION_OP_version:
> +        u.n = (xen_major_version() << 16) | xen_minor_version();
> +        break;
> +
> +    case XEN_VERSION_OP_extraversion:
> +        ptr = xen_extra_version();
> +        break;
> +
> +    case XEN_VERSION_OP_capabilities:
> +        ptr = capabilities_info(&sz);
> +        break;
> +
> +    case XEN_VERSION_OP_platform_parameters:
> +        u.n = HYPERVISOR_VIRT_START;
> +        break;
> +
> +    case XEN_VERSION_OP_changeset:
> +        ptr = xen_changeset();
> +        break;
> +
> +    case XEN_VERSION_OP_get_features:
> +        if ( copy_from_guest(&u.fi, arg, 1) )
> +        {
> +            rc = -EFAULT;
> +            break;
> +        }
> +        rc = get_features(current->domain, &u.fi);
> +        break;
> +
> +    case XEN_VERSION_OP_pagesize:
> +        u.n = PAGE_SIZE;
> +        break;
> +
> +    case XEN_VERSION_OP_guest_handle:
> +        ptr = current->domain->handle;
> +        break;
> +
> +    case XEN_VERSION_OP_commandline:
> +        ptr = saved_cmdline;
> +        break;
> +
> +    default:
> +        rc = -ENOSYS;
> +    }
> +
> +    if ( !rc )
> +    {
> +        ssize_t bytes;
> +
> +        if ( sz > len )
> +            bytes = len;
> +        else
> +            bytes = sz;
> +
> +        if ( copy_to_guest(arg, ptr ? ptr : &u, bytes) )

Can be shortened to ptr ?: &u

> +            rc = -EFAULT;
> +    }
> +    if ( !rc )
> +    {
> +        /*
> +         * We return len (truncate) worth of data even if we fail.
> +         */
> +        if ( sz > len )
> +            rc = -ENOBUFS;

This needs to be in the previous if() clause to avoid overriding -EFAULT
with -ENOBUFS.

> +
> +/*
> + * The HYPERCALL_version_op has a set of sub-ops which mirror the
> + * sub-ops of HYPERCALL_xen_version. However this hypercall differs
> + * radically from the former:
> + *  - It returns the amount of bytes returned.
> + *  - It will return -XEN_EPERM if the guest is not permitted.
> + *  - It will return the requested data in arg.
> + *  - It requires an third argument (len) for the length of the
> + *    arg. Naturally the arg has to fit the requested data otherwise
> + *    -XEN_ENOBUFS is returned.
> + *
> + * It also offers an mechanism to probe for the amount of bytes an
> + * sub-op will require. Having the arg have an NULL pointer will
> + * return the number of bytes requested for the operation. Or an
> + * negative value if an error is encountered.
> + */
> +
> +typedef uint64_t xen_version_op_val_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_version_op_val_t);
> +
> +typedef unsigned char xen_version_op_buf_t[];
> +DEFINE_XEN_GUEST_HANDLE(xen_version_op_buf_t);

Strictly speaking this should be a void* guest handle, as not all data
is returned via this mechanism is unsigned char.

> +
> +/* arg == version_op_val_t. Encoded as major:minor (31..16:15..0) */
> +#define XEN_VERSION_OP_version      0
> +
> +/* arg == version_op_buf. */
> +#define XEN_VERSION_OP_extraversion 1
> +
> +/* arg == version_op_buf */
> +#define XEN_VERSION_OP_capabilities 3
> +
> +/* arg == version_op_buf */
> +#define XEN_VERSION_OP_changeset 4

Might be worth stating that these return NUL terminated utf-8 strings?

~Andrew
Konrad Rzeszutek Wilk March 15, 2016, 8:19 p.m. UTC | #2
.. snip ..
> > +    case XEN_VERSION_OP_guest_handle:
> > +        *sz = ARRAY_SIZE(current->domain->handle);
> > +        break;
> > +
> > +    case XEN_VERSION_OP_commandline:
> > +        *sz = ARRAY_SIZE(saved_cmdline);
> > +        break;
> > +
> > +    default:
> > +        rc = -ENOSYS;
> > +    }
> > +
> > +    return rc;
> > +}
> > +
> > +/*
> > + * Similar to HYPERVISOR_xen_version but with a sane interface
> > + * (has a length, one can probe for the length) and with one less sub-ops:
> > + * missing XENVER_compile_info.
> > + */
> > +DO(version_op)(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg,
> > +               unsigned int len)
> > +{
> > +    union {
> > +        xen_version_op_val_t n;
> > +        xen_feature_info_t fi;
> > +    } u;
> 
> = {}; and you can forgo the explicit memset() below.

Done!
> 
> > +    ssize_t sz = 0;
> > +    const void *ptr = NULL;
> > +    int rc = xsm_version_op(XSM_OTHER, cmd);
> > +
> > +    /* We can safely return -EPERM! */
> > +    if ( rc )
> > +        return rc;
> > +
> > +    rc = size_of_subops_data(cmd, &sz);
> > +    if ( rc )
> > +        return rc;
> > +
> > +    /* Some of the subops may have no data. */
> > +    if ( !sz )
> > +        return 0;
> 
> Really? I would have thought it would be reasonable to assert that
> either sz != 0 after the rc != 0 return.

Commandline and guest_handle may be empty. Ah they aren't as
they are array.

ARRAY_SIZE(saved_commandline) is always 1024. Ugh.

.. snip..

> > +
> > +    if ( !rc )
> > +    {
> > +        ssize_t bytes;
> > +
> > +        if ( sz > len )
> > +            bytes = len;
> > +        else
> > +            bytes = sz;
> > +
> > +        if ( copy_to_guest(arg, ptr ? ptr : &u, bytes) )
> 
> Can be shortened to ptr ?: &u
> 
> > +            rc = -EFAULT;
> > +    }
> > +    if ( !rc )

         ^^^^^^^^^ - here
> > +    {
> > +        /*
> > +         * We return len (truncate) worth of data even if we fail.
> > +         */
> > +        if ( sz > len )
> > +            rc = -ENOBUFS;
> 
> This needs to be in the previous if() clause to avoid overriding -EFAULT
> with -ENOBUFS.

That is exactly why it is in its own 'if ( !rc )' - so it won't
overwrite -EFAULT. See above for 'here'


> 
> > +
> > +/*
> > + * The HYPERCALL_version_op has a set of sub-ops which mirror the
> > + * sub-ops of HYPERCALL_xen_version. However this hypercall differs
> > + * radically from the former:
> > + *  - It returns the amount of bytes returned.
> > + *  - It will return -XEN_EPERM if the guest is not permitted.
> > + *  - It will return the requested data in arg.
> > + *  - It requires an third argument (len) for the length of the
> > + *    arg. Naturally the arg has to fit the requested data otherwise
> > + *    -XEN_ENOBUFS is returned.
> > + *
> > + * It also offers an mechanism to probe for the amount of bytes an
> > + * sub-op will require. Having the arg have an NULL pointer will
> > + * return the number of bytes requested for the operation. Or an
> > + * negative value if an error is encountered.
> > + */
> > +
> > +typedef uint64_t xen_version_op_val_t;
> > +DEFINE_XEN_GUEST_HANDLE(xen_version_op_val_t);
> > +
> > +typedef unsigned char xen_version_op_buf_t[];
> > +DEFINE_XEN_GUEST_HANDLE(xen_version_op_buf_t);
> 
> Strictly speaking this should be a void* guest handle, as not all data
> is returned via this mechanism is unsigned char.

Done!
> 
> > +
> > +/* arg == version_op_val_t. Encoded as major:minor (31..16:15..0) */
> > +#define XEN_VERSION_OP_version      0
> > +
> > +/* arg == version_op_buf. */
> > +#define XEN_VERSION_OP_extraversion 1
> > +
> > +/* arg == version_op_buf */
> > +#define XEN_VERSION_OP_capabilities 3
> > +
> > +/* arg == version_op_buf */
> > +#define XEN_VERSION_OP_changeset 4
> 
> Might be worth stating that these return NUL terminated utf-8 strings?

Done!
> 
> ~Andrew
Daniel De Graaf March 22, 2016, 5:51 p.m. UTC | #3
On 03/15/2016 01:56 PM, Konrad Rzeszutek Wilk wrote:
> This hypercall mirrors the XENVER_ in that it has similar functionality.
> However it is designed differently:
>   - No compat layer. The data structures are the same size on 32
>     as on 64-bit.
>   - The hypercall accepts three arguments - the command, pointer to
>     an buffer, and the length of the buffer.
>   - Each sub-ops can be "probed" for size by returning the size of
>     buffer that will be needed - if the buffer is NULL.
>   - Subops can complete even if the buffer is too slow - truncated
>     data will be filled and hypercall will return -ENOBUFS.
>   - VERSION_OP_commandline, VERSION_OP_changeset are privileged.
>   - There are no XENVER_compile_info equivalent.
>   - The hypercall can return -EPERM and toolstack/OSes are expected
>     to deal with it.
>
> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

Acked-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
diff mbox

Patch

diff --git a/tools/flask/policy/policy/modules/xen/xen.te b/tools/flask/policy/policy/modules/xen/xen.te
index 7e7400d..bea40c1 100644
--- a/tools/flask/policy/policy/modules/xen/xen.te
+++ b/tools/flask/policy/policy/modules/xen/xen.te
@@ -74,12 +74,14 @@  allow dom0_t xen_t:xen2 {
     get_symbol
 };
 
-# Allow dom0 to use all XENVER_ subops
+# Allow dom0 to use all XENVER_ subops and VERSION_OP subops
 # Note that dom0 is part of domain_type so this has duplicates.
 allow dom0_t xen_t:version {
     xen_version xen_extraversion xen_compile_info xen_capabilities
     xen_changeset xen_platform_parameters xen_get_features xen_pagesize
     xen_guest_handle xen_commandline
+    version extraversion capabilities changeset platform_parameters
+    get_features pagesize guest_handle commandline
 };
 
 allow dom0_t xen_t:mmu memorymap;
@@ -146,11 +148,14 @@  if (guest_writeconsole) {
 # pmu_ctrl is for)
 allow domain_type xen_t:xen2 pmu_use;
 
-# For normal guests all except XENVER_commandline
+# For normal guests all except XENVER_commandline, VERSION_OP_changeset,
+# and VERSION_OP_commandline
 allow domain_type xen_t:version {
     xen_version xen_extraversion xen_compile_info xen_capabilities
     xen_changeset xen_platform_parameters xen_get_features xen_pagesize
     xen_guest_handle
+    version extraversion capabilities  platform_parameters
+    get_features pagesize guest_handle
 };
 ###############################################################################
 #
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 83744e8..31d2115 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -1235,6 +1235,7 @@  static arm_hypercall_t arm_hypercall_table[] = {
     HYPERCALL(multicall, 2),
     HYPERCALL(platform_op, 1),
     HYPERCALL_ARM(vcpu_op, 3),
+    HYPERCALL(version_op, 3),
 };
 
 #ifndef NDEBUG
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 255a1d6..56b9f6b 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -5332,6 +5332,7 @@  static const struct {
     COMPAT_CALL(platform_op),
     COMPAT_CALL(mmuext_op),
     HYPERCALL(xenpmu_op),
+    HYPERCALL(version_op),
     HYPERCALL(arch_1)
 };
 
diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index 927439d..8715945 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -391,6 +391,7 @@  ENTRY(compat_hypercall_table)
         .quad do_tmem_op
         .quad do_ni_hypercall           /* reserved for XenClient */
         .quad do_xenpmu_op              /* 40 */
+        .quad do_version_op
         .rept __HYPERVISOR_arch_0-((.-compat_hypercall_table)/8)
         .quad compat_ni_hypercall
         .endr
@@ -442,6 +443,7 @@  ENTRY(compat_hypercall_args_table)
         .byte 1 /* do_tmem_op               */
         .byte 0 /* reserved for XenClient   */
         .byte 2 /* do_xenpmu_op             */  /* 40 */
+        .byte 3 /* do_version_op            */
         .rept __HYPERVISOR_arch_0-(.-compat_hypercall_args_table)
         .byte 0 /* compat_ni_hypercall      */
         .endr
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index dd7f114..178dc3a 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -727,6 +727,7 @@  ENTRY(hypercall_table)
         .quad do_tmem_op
         .quad do_ni_hypercall       /* reserved for XenClient */
         .quad do_xenpmu_op          /* 40 */
+        .quad do_version_op
         .rept __HYPERVISOR_arch_0-((.-hypercall_table)/8)
         .quad do_ni_hypercall
         .endr
@@ -778,6 +779,7 @@  ENTRY(hypercall_args_table)
         .byte 1 /* do_tmem_op           */
         .byte 0 /* reserved for XenClient */
         .byte 2 /* do_xenpmu_op         */  /* 40 */
+        .byte 3 /* do_version_op        */
         .rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
diff --git a/xen/common/compat/kernel.c b/xen/common/compat/kernel.c
index e1b9013..e98ba7d 100644
--- a/xen/common/compat/kernel.c
+++ b/xen/common/compat/kernel.c
@@ -38,6 +38,9 @@  CHECK_TYPE(capabilities_info);
 
 CHECK_TYPE(domain_handle);
 
+CHECK_TYPE(version_op_buf);
+CHECK_TYPE(version_op_val);
+
 #define xennmi_callback compat_nmi_callback
 #define xennmi_callback_t compat_nmi_callback_t
 
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index 2699ac0..f06b3d9 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -221,6 +221,47 @@  void __init do_initcalls(void)
 
 #endif
 
+static int get_features(struct domain *d, xen_feature_info_t *fi)
+{
+    switch ( fi->submap_idx )
+    {
+    case 0:
+        fi->submap = (1U << XENFEAT_memory_op_vnode_supported);
+        if ( VM_ASSIST(d, pae_extended_cr3) )
+            fi->submap |= (1U << XENFEAT_pae_pgdir_above_4gb);
+        if ( paging_mode_translate(d) )
+            fi->submap |= 
+                (1U << XENFEAT_writable_page_tables) |
+                (1U << XENFEAT_auto_translated_physmap);
+        if ( is_hardware_domain(d) )
+            fi->submap |= 1U << XENFEAT_dom0;
+#ifdef CONFIG_X86
+        switch ( d->guest_type )
+        {
+        case guest_type_pv:
+            fi->submap |= (1U << XENFEAT_mmu_pt_update_preserve_ad) |
+                          (1U << XENFEAT_highmem_assist) |
+                          (1U << XENFEAT_gnttab_map_avail_bits);
+            break;
+        case guest_type_pvh:
+            fi->submap |= (1U << XENFEAT_hvm_safe_pvclock) |
+                          (1U << XENFEAT_supervisor_mode_kernel) |
+                          (1U << XENFEAT_hvm_callback_vector);
+            break;
+        case guest_type_hvm:
+            fi->submap |= (1U << XENFEAT_hvm_safe_pvclock) |
+                          (1U << XENFEAT_hvm_callback_vector) |
+                          (1U << XENFEAT_hvm_pirqs);
+           break;
+        }
+#endif
+        break;
+    default:
+        return -EINVAL;
+    }
+    return 0;
+}
+
 /*
  * Simple hypercalls.
  */
@@ -302,50 +343,16 @@  DO(xen_version)(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     case XENVER_get_features:
     {
         xen_feature_info_t fi;
-        struct domain *d = current->domain;
 
         if ( copy_from_guest(&fi, arg, 1) )
             return -EFAULT;
 
-        switch ( fi.submap_idx )
+        if ( !deny )
         {
-        case 0:
-            if ( deny )
-                break;
-            fi.submap = (1U << XENFEAT_memory_op_vnode_supported);
-            if ( VM_ASSIST(d, pae_extended_cr3) )
-                fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb);
-            if ( paging_mode_translate(d) )
-                fi.submap |= 
-                    (1U << XENFEAT_writable_page_tables) |
-                    (1U << XENFEAT_auto_translated_physmap);
-            if ( is_hardware_domain(d) )
-                fi.submap |= 1U << XENFEAT_dom0;
-#ifdef CONFIG_X86
-            switch ( d->guest_type )
-            {
-            case guest_type_pv:
-                fi.submap |= (1U << XENFEAT_mmu_pt_update_preserve_ad) |
-                             (1U << XENFEAT_highmem_assist) |
-                             (1U << XENFEAT_gnttab_map_avail_bits);
-                break;
-            case guest_type_pvh:
-                fi.submap |= (1U << XENFEAT_hvm_safe_pvclock) |
-                             (1U << XENFEAT_supervisor_mode_kernel) |
-                             (1U << XENFEAT_hvm_callback_vector);
-                break;
-            case guest_type_hvm:
-                fi.submap |= (1U << XENFEAT_hvm_safe_pvclock) |
-                             (1U << XENFEAT_hvm_callback_vector) |
-                             (1U << XENFEAT_hvm_pirqs);
-                break;
-            }
-#endif
-            break;
-        default:
-            return -EINVAL;
+            int rc = get_features(current->domain, &fi);
+            if ( rc )
+                return rc;
         }
-
         if ( __copy_to_guest(arg, &fi, 1) )
             return -EFAULT;
         return 0;
@@ -388,6 +395,188 @@  DO(xen_version)(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     return -ENOSYS;
 }
 
+static const char *capabilities_info(ssize_t *len)
+{
+    static xen_capabilities_info_t cached_cap;
+    static unsigned int cached_cap_len;
+    static bool_t cached;
+
+    if ( cached )
+    {
+        *len = cached_cap_len;
+        return cached_cap;
+    }
+    arch_get_xen_caps(&cached_cap);
+    cached_cap_len = strlen(cached_cap) + 1;
+
+    *len = cached_cap_len;
+    return cached_cap;
+}
+
+static int size_of_subops_data(unsigned int cmd, ssize_t *sz)
+{
+    int rc = 0;
+    /* Compute size. */
+    switch ( cmd )
+    {
+    case XEN_VERSION_OP_version:
+        *sz = sizeof(xen_version_op_val_t);
+        break;
+
+    case XEN_VERSION_OP_extraversion:
+        *sz = strlen(xen_extra_version()) + 1;
+        break;
+
+    case XEN_VERSION_OP_capabilities:
+        capabilities_info(sz);
+        break;
+
+    case XEN_VERSION_OP_platform_parameters:
+        *sz = sizeof(xen_version_op_val_t);
+        break;
+
+    case XEN_VERSION_OP_changeset:
+        *sz = strlen(xen_changeset()) + 1;
+        break;
+
+    case XEN_VERSION_OP_get_features:
+        *sz = sizeof(xen_feature_info_t);
+        break;
+
+    case XEN_VERSION_OP_pagesize:
+        *sz = sizeof(xen_version_op_val_t);
+        break;
+
+    case XEN_VERSION_OP_guest_handle:
+        *sz = ARRAY_SIZE(current->domain->handle);
+        break;
+
+    case XEN_VERSION_OP_commandline:
+        *sz = ARRAY_SIZE(saved_cmdline);
+        break;
+
+    default:
+        rc = -ENOSYS;
+    }
+
+    return rc;
+}
+
+/*
+ * Similar to HYPERVISOR_xen_version but with a sane interface
+ * (has a length, one can probe for the length) and with one less sub-ops:
+ * missing XENVER_compile_info.
+ */
+DO(version_op)(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg,
+               unsigned int len)
+{
+    union {
+        xen_version_op_val_t n;
+        xen_feature_info_t fi;
+    } u;
+    ssize_t sz = 0;
+    const void *ptr = NULL;
+    int rc = xsm_version_op(XSM_OTHER, cmd);
+
+    /* We can safely return -EPERM! */
+    if ( rc )
+        return rc;
+
+    rc = size_of_subops_data(cmd, &sz);
+    if ( rc )
+        return rc;
+
+    /* Some of the subops may have no data. */
+    if ( !sz )
+        return 0;
+    /*
+     * This hypercall also allows the client to probe. If it provides
+     * a NULL arg we will return the size of the space it has to
+     * allocate for the specific sub-op.
+     */
+    if ( guest_handle_is_null(arg) )
+        return sz;
+
+    memset(&u, 0, sizeof(u));
+    /*
+     * The HYPERVISOR_xen_version differs in that some return the value,
+     * and some copy it on back on argument. We follow the same rule for all
+     * sub-ops: return 0 on success, positive value of bytes returned, and
+     * always copy the result in arg. Yeey sanity!
+     */
+
+    rc = 0;
+    switch ( cmd )
+    {
+    case XEN_VERSION_OP_version:
+        u.n = (xen_major_version() << 16) | xen_minor_version();
+        break;
+
+    case XEN_VERSION_OP_extraversion:
+        ptr = xen_extra_version();
+        break;
+
+    case XEN_VERSION_OP_capabilities:
+        ptr = capabilities_info(&sz);
+        break;
+
+    case XEN_VERSION_OP_platform_parameters:
+        u.n = HYPERVISOR_VIRT_START;
+        break;
+
+    case XEN_VERSION_OP_changeset:
+        ptr = xen_changeset();
+        break;
+
+    case XEN_VERSION_OP_get_features:
+        if ( copy_from_guest(&u.fi, arg, 1) )
+        {
+            rc = -EFAULT;
+            break;
+        }
+        rc = get_features(current->domain, &u.fi);
+        break;
+
+    case XEN_VERSION_OP_pagesize:
+        u.n = PAGE_SIZE;
+        break;
+
+    case XEN_VERSION_OP_guest_handle:
+        ptr = current->domain->handle;
+        break;
+
+    case XEN_VERSION_OP_commandline:
+        ptr = saved_cmdline;
+        break;
+
+    default:
+        rc = -ENOSYS;
+    }
+
+    if ( !rc )
+    {
+        ssize_t bytes;
+
+        if ( sz > len )
+            bytes = len;
+        else
+            bytes = sz;
+
+        if ( copy_to_guest(arg, ptr ? ptr : &u, bytes) )
+            rc = -EFAULT;
+    }
+    if ( !rc )
+    {
+        /*
+         * We return len (truncate) worth of data even if we fail.
+         */
+        if ( sz > len )
+            rc = -ENOBUFS;
+    }
+
+    return rc == 0 ? sz : rc;
+}
+
 DO(nmi_op)(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
     struct xennmi_callback cb;
diff --git a/xen/include/public/arch-arm.h b/xen/include/public/arch-arm.h
index 870bc3b..c9ae315 100644
--- a/xen/include/public/arch-arm.h
+++ b/xen/include/public/arch-arm.h
@@ -128,6 +128,9 @@ 
  *    * VCPUOP_register_vcpu_info
  *    * VCPUOP_register_runstate_memory_area
  *
+ *  HYPERVISOR_version_op
+ *   All generic sub-operations
+ *
  *
  * Other notes on the ARM ABI:
  *
diff --git a/xen/include/public/version.h b/xen/include/public/version.h
index 24a582f..4ceb97b 100644
--- a/xen/include/public/version.h
+++ b/xen/include/public/version.h
@@ -30,7 +30,15 @@ 
 
 #include "xen.h"
 
-/* NB. All ops return zero on success, except XENVER_{version,pagesize} */
+/*
+ * There are two hypercalls mentioned in here. The XENVER_ are for
+ * HYPERCALL_xen_version (17), while VERSION_OP_ are for the
+ * HYPERCALL_version_op (41).
+ *
+ * The subops are very similar except that the later hypercall has a
+ * sane interface.
+ */
+
 
 /* arg == NULL; returns major:minor (16:16). */
 #define XENVER_version      0
@@ -87,6 +95,68 @@  typedef struct xen_feature_info xen_feature_info_t;
 #define XENVER_commandline 9
 typedef char xen_commandline_t[1024];
 
+
+
+/*
+ * The HYPERCALL_version_op has a set of sub-ops which mirror the
+ * sub-ops of HYPERCALL_xen_version. However this hypercall differs
+ * radically from the former:
+ *  - It returns the amount of bytes returned.
+ *  - It will return -XEN_EPERM if the guest is not permitted.
+ *  - It will return the requested data in arg.
+ *  - It requires an third argument (len) for the length of the
+ *    arg. Naturally the arg has to fit the requested data otherwise
+ *    -XEN_ENOBUFS is returned.
+ *
+ * It also offers an mechanism to probe for the amount of bytes an
+ * sub-op will require. Having the arg have an NULL pointer will
+ * return the number of bytes requested for the operation. Or an
+ * negative value if an error is encountered.
+ */
+
+typedef uint64_t xen_version_op_val_t;
+DEFINE_XEN_GUEST_HANDLE(xen_version_op_val_t);
+
+typedef unsigned char xen_version_op_buf_t[];
+DEFINE_XEN_GUEST_HANDLE(xen_version_op_buf_t);
+
+/* arg == version_op_val_t. Encoded as major:minor (31..16:15..0) */
+#define XEN_VERSION_OP_version      0
+
+/* arg == version_op_buf. */
+#define XEN_VERSION_OP_extraversion 1
+
+/* arg == version_op_buf */
+#define XEN_VERSION_OP_capabilities 3
+
+/* arg == version_op_buf */
+#define XEN_VERSION_OP_changeset 4
+
+/*
+ * arg == xen_version_op_val_t. Contains the virtual address
+ * of the hypervisor encoded as [63..0].
+ */
+#define XEN_VERSION_OP_platform_parameters 5
+
+/*
+ * arg = xen_feature_info_t - shares the same structure
+ * as the XENVER_get_features.
+ */
+#define XEN_VERSION_OP_get_features 6
+
+/* arg == xen_version_op_val_t */
+#define XEN_VERSION_OP_pagesize 7
+
+/* arg == version_op_buf.
+ *
+ * The toolstack fills it out for guest consumption. It is intended to hold
+ * the UUID of the guest.
+ */
+#define XEN_VERSION_OP_guest_handle 8
+
+/* arg = version_op_buf */
+#define XEN_VERSION_OP_commandline 9
+
 #endif /* __XEN_PUBLIC_VERSION_H__ */
 
 /*
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 64ba7ab..1a99929 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -115,6 +115,7 @@  DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
 #define __HYPERVISOR_tmem_op              38
 #define __HYPERVISOR_xc_reserved_op       39 /* reserved for XenClient */
 #define __HYPERVISOR_xenpmu_op            40
+#define __HYPERVISOR_version_op           41 /* supersedes xen_version (17) */
 
 /* Architecture-specific hypercall definitions. */
 #define __HYPERVISOR_arch_0               48
diff --git a/xen/include/xen/hypercall.h b/xen/include/xen/hypercall.h
index 26cb615..00e4245 100644
--- a/xen/include/xen/hypercall.h
+++ b/xen/include/xen/hypercall.h
@@ -143,6 +143,10 @@  do_xenoprof_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg);
 extern long
 do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg);
 
+extern long
+do_version_op(unsigned int cmd,
+    XEN_GUEST_HANDLE_PARAM(void) arg, unsigned int len);
+
 #ifdef CONFIG_COMPAT
 
 extern int
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index 94b8855..8c6ae90 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -749,3 +749,22 @@  static XSM_INLINE int xsm_xen_version (XSM_DEFAULT_ARG uint32_t op)
         return xsm_default_action(XSM_PRIV, current->domain, NULL);
     }
 }
+
+static XSM_INLINE int xsm_version_op (XSM_DEFAULT_ARG uint32_t op)
+{
+    XSM_ASSERT_ACTION(XSM_OTHER);
+    switch ( op )
+    {
+    case XEN_VERSION_OP_version:
+    case XEN_VERSION_OP_extraversion:
+    case XEN_VERSION_OP_capabilities:
+    case XEN_VERSION_OP_platform_parameters:
+    case XEN_VERSION_OP_get_features:
+    case XEN_VERSION_OP_pagesize:
+    case XEN_VERSION_OP_guest_handle:
+        /* These MUST always be accessible to any guest by default. */
+        return xsm_default_action(XSM_HOOK, current->domain, NULL);
+    default:
+        return xsm_default_action(XSM_PRIV, current->domain, NULL);
+    }
+}
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index db440f6..ac80472 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -194,6 +194,7 @@  struct xsm_operations {
     int (*pmu_op) (struct domain *d, unsigned int op);
 #endif
     int (*xen_version) (uint32_t cmd);
+    int (*version_op) (uint32_t cmd);
 };
 
 #ifdef CONFIG_XSM
@@ -736,6 +737,12 @@  static inline int xsm_xen_version (xsm_default_t def, uint32_t op)
 {
     return xsm_ops->xen_version(op);
 }
+
+static inline int xsm_version_op (xsm_default_t def, uint32_t op)
+{
+    return xsm_ops->version_op(op);
+}
+
 #endif /* XSM_NO_WRAPPERS */
 
 #ifdef CONFIG_MULTIBOOT
diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
index 9791ad4..776dd09 100644
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -163,4 +163,5 @@  void xsm_fixup_ops (struct xsm_operations *ops)
     set_to_dummy_if_null(ops, pmu_op);
 #endif
     set_to_dummy_if_null(ops, xen_version);
+    set_to_dummy_if_null(ops, version_op);
 }
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index d1bef43..2510229 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1662,6 +1662,44 @@  static int flask_xen_version (uint32_t op)
     }
 }
 
+static int flask_version_op (uint32_t op)
+{
+    u32 dsid = domain_sid(current->domain);
+
+    switch ( op )
+    {
+    case XEN_VERSION_OP_version:
+        return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_VERSION,
+                            VERSION__VERSION, NULL);
+    case XEN_VERSION_OP_extraversion:
+        return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_VERSION,
+                            VERSION__EXTRAVERSION, NULL);
+    case XEN_VERSION_OP_capabilities:
+        return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_VERSION,
+                            VERSION__CAPABILITIES, NULL);
+    case XEN_VERSION_OP_changeset:
+        return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_VERSION,
+                            VERSION__CHANGESET, NULL);
+    case XEN_VERSION_OP_platform_parameters:
+        return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_VERSION,
+                            VERSION__PLATFORM_PARAMETERS, NULL);
+    case XEN_VERSION_OP_get_features:
+        return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_VERSION,
+                            VERSION__GET_FEATURES, NULL);
+    case XEN_VERSION_OP_pagesize:
+        return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_VERSION,
+                            VERSION__PAGESIZE, NULL);
+    case XEN_VERSION_OP_guest_handle:
+        return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_VERSION,
+                            VERSION__GUEST_HANDLE, NULL);
+    case XEN_VERSION_OP_commandline:
+        return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_VERSION,
+                            VERSION__COMMANDLINE, NULL);
+    default:
+        return -EPERM;
+    }
+}
+
 long do_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op);
 int compat_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op);
 
@@ -1801,6 +1839,7 @@  static struct xsm_operations flask_ops = {
     .pmu_op = flask_pmu_op,
 #endif
     .xen_version = flask_xen_version,
+    .version_op = flask_version_op,
 };
 
 static __init void flask_init(void)
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index 628dd5c..59c9f69 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -496,9 +496,10 @@  class security
     del_ocontext
 }
 
-# Class version is used to describe the XENVER_ hypercall.
+# Class version is used to describe the XENVER_ and VERSION_OP hypercall.
 # Each sub-ops is described here - in the default case all of them should
-# be allowed except the XENVER_commandline.
+# be allowed except the XENVER_commandline, VERSION_OP_commandline, and
+# VERSION_OP_changeset.
 #
 class version
 {
@@ -522,4 +523,23 @@  class version
     xen_guest_handle
 # Xen command line.
     xen_commandline
+
+# Often called by PV kernels to force an callback.
+    version
+# Extra informations (-unstable).
+    extraversion
+# Such as "xen-3.0-x86_64 xen-3.0-x86_32p hvm-3.0-x86_32 hvm-3.0-x86_32p hvm-3.0-x86_64".
+    capabilities
+# Such as the virtual address of where the hypervisor resides.
+    platform_parameters
+# Source code changeset.
+    changeset
+# The features the hypervisor supports.
+    get_features
+# Page size the hypervisor uses.
+    pagesize
+# An value that the control stack can choose.
+    guest_handle
+# Xen command line.
+    commandline
 }