diff mbox series

[v3,4/9] xen: add basic hypervisor filesystem support

Message ID 20200121084330.18309-5-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series Add hypervisor sysfs-like support | expand

Commit Message

Jürgen Groß Jan. 21, 2020, 8:43 a.m. UTC
Add the infrastructure for the hypervisor filesystem.

This includes the hypercall interface and the base functions for
entry creation, deletion and modification.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V1:
- rename files from filesystem.* to hypfs.*
- add dummy write entry support
- rename hypercall filesystem_op to hypfs_op
- add support for unsigned integer entries

V2:
- test new entry name to be valid

V3:
- major rework, especially by supporting binary contents of entries
- addressed all comments
---
 xen/arch/arm/traps.c         |   1 +
 xen/arch/x86/hvm/hypercall.c |   1 +
 xen/arch/x86/hypercall.c     |   1 +
 xen/arch/x86/pv/hypercall.c  |   1 +
 xen/common/Makefile          |   1 +
 xen/common/hypfs.c           | 365 +++++++++++++++++++++++++++++++++++++++++++
 xen/include/public/hypfs.h   | 124 +++++++++++++++
 xen/include/public/xen.h     |   1 +
 xen/include/xen/hypercall.h  |   8 +
 xen/include/xen/hypfs.h      |  89 +++++++++++
 10 files changed, 592 insertions(+)
 create mode 100644 xen/common/hypfs.c
 create mode 100644 xen/include/public/hypfs.h
 create mode 100644 xen/include/xen/hypfs.h

Comments

Wei Liu Jan. 31, 2020, 3:50 p.m. UTC | #1
On Tue, Jan 21, 2020 at 09:43:25AM +0100, Juergen Gross wrote:
[...]
> diff --git a/xen/common/hypfs.c b/xen/common/hypfs.c
> new file mode 100644
> index 0000000000..6762d20dfd
> --- /dev/null
> +++ b/xen/common/hypfs.c
> @@ -0,0 +1,365 @@
> +/******************************************************************************
> + *
> + * hypfs.c
> + *
> + * Simple sysfs-like file system for the hypervisor.
> + */
> +
> +#include <xen/lib.h>

This should come after hypfs.h.

If it has come first it probably means one of the headers below hasn't
included it properly?

> +#include <xen/err.h>
> +#include <xen/guest_access.h>
> +#include <xen/hypercall.h>
> +#include <xen/hypfs.h>
> +#include <xen/rwlock.h>
> +#include <public/hypfs.h>
Jürgen Groß Feb. 3, 2020, 9:12 a.m. UTC | #2
On 31.01.20 16:50, Wei Liu wrote:
> On Tue, Jan 21, 2020 at 09:43:25AM +0100, Juergen Gross wrote:
> [...]
>> diff --git a/xen/common/hypfs.c b/xen/common/hypfs.c
>> new file mode 100644
>> index 0000000000..6762d20dfd
>> --- /dev/null
>> +++ b/xen/common/hypfs.c
>> @@ -0,0 +1,365 @@
>> +/******************************************************************************
>> + *
>> + * hypfs.c
>> + *
>> + * Simple sysfs-like file system for the hypervisor.
>> + */
>> +
>> +#include <xen/lib.h>
> 
> This should come after hypfs.h.
> 
> If it has come first it probably means one of the headers below hasn't
> included it properly?

I'll move it, as the build will still be fine.


Juergen
Jan Beulich Feb. 3, 2020, 3:07 p.m. UTC | #3
On 21.01.2020 09:43, Juergen Gross wrote:
> ---
>  xen/arch/arm/traps.c         |   1 +
>  xen/arch/x86/hvm/hypercall.c |   1 +
>  xen/arch/x86/hypercall.c     |   1 +
>  xen/arch/x86/pv/hypercall.c  |   1 +
>  xen/common/Makefile          |   1 +
>  xen/common/hypfs.c           | 365 +++++++++++++++++++++++++++++++++++++++++++
>  xen/include/public/hypfs.h   | 124 +++++++++++++++
>  xen/include/public/xen.h     |   1 +
>  xen/include/xen/hypercall.h  |   8 +
>  xen/include/xen/hypfs.h      |  89 +++++++++++
>  10 files changed, 592 insertions(+)

Even if it's just two structures that you have in the public
header, your assertion of the interface being guest bitness
agnostic should be accompanied by actual proof, i.e. addition
to xen/include/xlat.lst.

> +static int add_entry(struct hypfs_entry_dir *parent, struct hypfs_entry *new)
> +{
> +    int ret = -ENOENT;
> +    struct hypfs_entry *e;
> +
> +    write_lock(&hypfs_lock);
> +
> +    list_for_each_entry ( e, &parent->dirlist, list )
> +    {
> +        int cmp = strcmp(e->name, new->name);
> +
> +        if ( cmp > 0 )
> +        {
> +            ret = 0;
> +            list_add_tail(&new->list, &e->list);
> +            break;
> +        }
> +        if ( cmp == 0 )
> +        {
> +            ret = -EEXIST;
> +            break;
> +        }
> +    }
> +
> +    if ( ret == -ENOENT )
> +    {
> +        ret = 0;
> +        list_add_tail(&new->list, &parent->dirlist);
> +    }
> +
> +    if ( !ret )
> +    {
> +        unsigned int sz = strlen(new->name) + 1;
> +
> +        parent->e.size += DIRENTRY_SIZE(sz);

Would DIRENTRY_SIZE() perhaps better include the "+ 1"?

> +int hypfs_add_entry(struct hypfs_entry_dir *parent,
> +                    struct hypfs_entry *entry, bool nofault)
> +{
> +    int ret;
> +
> +    ret = add_entry(parent, entry);
> +    BUG_ON(nofault && ret);
> +
> +    return ret;
> +}

While this and its two siblings have no caller, the one above
doesn't even have a declaration in the header file. What is
this intended to be used for (from external callers)?

I also think the "nofault" aspect could do with discussing in
the commit message - it seems quite odd to me.

> +static int hypfs_get_path_user(char *buf, XEN_GUEST_HANDLE_PARAM(void) uaddr,
> +                               unsigned long len)

For consistency with naming elsewhere as well as uaddr here -
ulen?

> +static struct hypfs_entry *hypfs_get_entry_rel(struct hypfs_entry_dir *dir,
> +                                               const char *path)
> +{
> +    const char *end;
> +    struct hypfs_entry *entry;
> +    unsigned int name_len;
> +
> +    if ( !*path )
> +        return &dir->e;
> +
> +    if ( dir->e.type != XEN_HYPFS_TYPE_DIR )
> +        return NULL;

Don't these two need switching around, to make sure /a/b/c/
doesn't match a non-dir /a/b/c ?

> +    end = strchr(path, '/');
> +    if ( !end )
> +        end = strchr(path, '\0');
> +    name_len = end - path;
> +
> +    list_for_each_entry ( entry, &dir->dirlist, list )
> +    {
> +        int cmp = strncmp(path, entry->name, name_len);
> +	struct hypfs_entry_dir *d = container_of(entry,

A hard tab slipped in here.

> +                                                 struct hypfs_entry_dir, e);
> +
> +        if ( cmp < 0 )
> +            return NULL;
> +        if ( !cmp && strlen(entry->name) == name_len )
> +            return *end ? hypfs_get_entry_rel(d, end + 1) : entry;
> +    }
> +
> +    return NULL;
> +}
> +
> +struct hypfs_entry *hypfs_get_entry(const char *path)
> +{
> +    if ( path[0] != '/' )
> +        return NULL;
> +
> +    return hypfs_get_entry_rel(&hypfs_root, path + 1);
> +}
> +
> +int hypfs_read_dir(const struct hypfs_entry *entry,
> +                   XEN_GUEST_HANDLE_PARAM(void) uaddr)
> +{
> +    const struct hypfs_entry_dir *d;
> +    struct hypfs_entry *e;

const?

> +static int hypfs_read(const struct hypfs_entry *entry,
> +                      XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
> +{
> +    struct xen_hypfs_direntry e;
> +    long ret = -EINVAL;
> +
> +    if ( ulen < sizeof(e) )
> +        goto out;
> +
> +    e.flags = entry->write ? XEN_HYPFS_WRITEABLE : 0;
> +    e.type = entry->type;
> +    e.encoding = entry->encoding;
> +    e.content_len = entry->size;
> +
> +    ret = -EFAULT;
> +    if ( copy_to_guest(uaddr, &e, 1) )
> +        goto out;
> +
> +    ret = 0;
> +    if ( ulen < entry->size + sizeof(e) )
> +        goto out;

So you return "success" even if the operation didn't complete
successfully. This isn't very nice, plus ...

> +    guest_handle_add_offset(uaddr, sizeof(e));
> +
> +    ret = entry->read(entry, uaddr);

... how is the caller to know whether direntry was at least
copied if this then fails?

> +int hypfs_write_leaf(struct hypfs_entry_leaf *leaf,
> +                     XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
> +{
> +    char *buf;
> +    int ret;
> +
> +    if ( ulen > leaf->e.size )
> +        ulen = leaf->e.size;

Silent truncation?

> +    buf = xzalloc_array(char, ulen);

Why the z variant?

> +    if ( !buf )
> +        return -ENOMEM;
> +
> +    ret = -EFAULT;
> +    if ( copy_from_guest(buf, uaddr, ulen) )
> +        goto out;
> +
> +    ret = 0;
> +    if ( leaf->e.type == XEN_HYPFS_TYPE_STRING )
> +        buf[leaf->e.size - 1] = 0;

And possible further silent truncation? And if the incoming
buffer has no nul byte at ulen-1, you'll then ...

> +    memcpy(leaf->write_ptr, buf, ulen);

... produce a strange concatenation of new and tail of old
contents?

Anyway, this and ...

> + out:
> +    xfree(buf);
> +    return ret;
> +}
> +
> +int hypfs_write_bool(struct hypfs_entry_leaf *leaf,
> +                     XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
> +{

... this function aren't very helpful to review without there
being a caller. Could these be introduced at the time a first
caller appears?

> +    union {
> +        char buf[8];
> +        uint8_t u8;
> +        uint16_t u16;
> +        uint32_t u32;
> +        uint64_t u64;
> +    } u;
> +
> +    ASSERT(leaf->e.type == XEN_HYPFS_TYPE_UINT && leaf->e.size <= 8);
> +
> +    if ( ulen != leaf->e.size )
> +        return -EDOM;

Is this restriction really necessary? Setting e.g. a 4-byte
field from 1-byte input is no problem at all. This being for
booleans I anyway wonder why input might be helpful to have
larger than a single byte. But maybe all of this is again a
result of not seeing what a user of the function would look
like.

> +long do_hypfs_op(unsigned int cmd,
> +                 XEN_GUEST_HANDLE_PARAM(void) arg1, unsigned long arg2,
> +                 XEN_GUEST_HANDLE_PARAM(void) arg3, unsigned long arg4)
> +{
> +    int ret;
> +    struct hypfs_entry *entry;
> +    static char path[XEN_HYPFS_MAX_PATHLEN];
> +
> +    if ( !is_control_domain(current->domain) &&
> +         !is_hardware_domain(current->domain) )
> +        return -EPERM;
> +
> +    if ( cmd == XEN_HYPFS_OP_get_version )
> +        return XEN_HYPFS_VERSION;
> +
> +    if ( cmd == XEN_HYPFS_OP_write_contents )
> +        write_lock(&hypfs_lock);
> +    else
> +        read_lock(&hypfs_lock);
> +
> +    ret = hypfs_get_path_user(path, arg1, arg2);
> +    if ( ret )
> +        goto out;
> +
> +    entry = hypfs_get_entry(path);
> +    if ( !entry )
> +    {
> +        ret = -ENOENT;
> +        goto out;
> +    }
> +
> +    switch ( cmd )
> +    {
> +    case XEN_HYPFS_OP_read_contents:
> +        ret = hypfs_read(entry, arg3, arg4);
> +        break;
> +
> +    case XEN_HYPFS_OP_write_contents:
> +        ret = hypfs_write(entry, arg3, arg4);
> +        break;
> +
> +    default:
> +        ret = -ENOSYS;

EINVAL or EOPNOTSUPP please. ENOSYS should be used only for not
implemented top level functions.

> --- /dev/null
> +++ b/xen/include/public/hypfs.h
> @@ -0,0 +1,124 @@
> +/******************************************************************************
> + * Xen Hypervisor Filesystem
> + *
> + * Copyright (c) 2019, SUSE Software Solutions Germany GmbH
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __XEN_PUBLIC_HYPFS_H__
> +#define __XEN_PUBLIC_HYPFS_H__
> +
> +#include "xen.h"
> +
> +/*
> + * Definitions for the __HYPERVISOR_hypfs_op hypercall.
> + */
> +
> +/* Highest version number of the hypfs interface currently defined. */
> +#define XEN_HYPFS_VERSION      1

For this and the accompanying XEN_HYPFS_OP_get_version, at least
the doc added by patch 3 could actually do with mentioning the
intentions you have with this.

> +/* Maximum length of a path in the filesystem. */
> +#define XEN_HYPFS_MAX_PATHLEN 1024
> +
> +struct xen_hypfs_direntry {
> +    uint16_t flags;
> +#define XEN_HYPFS_WRITEABLE    0x0001
> +    uint8_t type;
> +#define XEN_HYPFS_TYPE_DIR     0x0000
> +#define XEN_HYPFS_TYPE_BLOB    0x0001
> +#define XEN_HYPFS_TYPE_STRING  0x0002
> +#define XEN_HYPFS_TYPE_UINT    0x0003
> +#define XEN_HYPFS_TYPE_INT     0x0004
> +#define XEN_HYPFS_TYPE_BOOL    0x0005
> +    uint8_t encoding;
> +#define XEN_HYPFS_ENC_PLAIN    0x0000
> +#define XEN_HYPFS_ENC_GZIP     0x0001

Meaning I can e.g. have a gzip-ed string or bool (or even dir)?
If this is just for "blob", why have separate fields instead of
e.g. BLOB_RAW and BLOB_GZIP or some such?

Also - why 4 digits in the constants when the fields are uint8_t?
Since these are enum-like, I even wonder if hex is warranted
here.

> +    uint32_t content_len;
> +};
> +
> +struct xen_hypfs_dirlistentry {
> +    struct xen_hypfs_direntry e;
> +    /* Offset in bytes to next entry (0 == this is the last entry). */
> +    uint16_t off_next;
> +    char name[XEN_FLEX_ARRAY_DIM];
> +};

The interaction of the last two fields may want spelling out:
I _assume_ name[] is nul-terminated, and off_next exists to
potentially skip trailing padding?

> +/*
> + * Hypercall operations.
> + */
> +
> +/*
> + * XEN_HYPFS_OP_get_version
> + *
> + * Read highest interface version supported by the hypervisor.
> + *
> + * Possible return values:
> + * >0: highest supported interface version
> + * <0: negative Xen errno value
> + */
> +#define XEN_HYPFS_OP_get_version     0
> +
> +/*
> + * XEN_HYPFS_OP_read_contents
> + *
> + * Read contents of a filesystem entry.
> + *
> + * Returns the direntry and contents of an entry in the buffer supplied by the
> + * caller (struct xen_hypfs_direntry with the contents following directly
> + * after it).
> + * The data buffer must be at least the size of the direntry returned in order
> + * to have success. If the data buffer was not large enough for all the data
> + * no entry data is returned, but the direntry will contain the needed size
> + * for the returned data.
> + * The format of the contents is according to its entry type and encoding.

What specifically this means for a dir would be nice to be spelled
out.

> + * arg1: XEN_GUEST_HANDLE(path name)
> + * arg2: length of path name (including trailing zero byte)
> + * arg3: XEN_GUEST_HANDLE(data buffer written by hypervisor)
> + * arg4: data buffer size
> + *
> + * Possible return values:
> + * 0: success (at least the direntry was returned)
> + * <0 : negative Xen errno value
> + */
> +#define XEN_HYPFS_OP_read_contents     1
> +
> +/*
> + * XEN_HYPFS_OP_write_contents
> + *
> + * Write contents of a filesystem entry.
> + *
> + * Writes an entry with the contents of a buffer supplied by the caller.
> + * The data type and encoding can't be changed. The size can be changed only
> + * for blobs and strings.
> + *
> + * arg1: XEN_GUEST_HANDLE(path name)
> + * arg2: length of path name (including trailing zero byte)
> + * arg3: XEN_GUEST_HANDLE(content buffer read by hypervisor)
> + * arg4: content buffer size
> + *
> + * Possible return values:
> + * 0: success
> + * <0 : negative Xen errno value
> + */
> +#define XEN_HYPFS_OP_write_contents    2

This one indeed accesses only the actual data (contents) of the
referenced entry. XEN_HYPFS_OP_read_contents hands back also
the dir entry. Should the latter then better be named just
XEN_HYPFS_OP_read?

> --- a/xen/include/xen/hypercall.h
> +++ b/xen/include/xen/hypercall.h
> @@ -150,6 +150,14 @@ do_dm_op(
>      unsigned int nr_bufs,
>      XEN_GUEST_HANDLE_PARAM(xen_dm_op_buf_t) bufs);
>  
> +extern long
> +do_hypfs_op(
> +    unsigned int cmd,
> +    XEN_GUEST_HANDLE_PARAM(void) arg1,

Do you anticipate this parameter to be used for other than path
names? If not, perhaps better XEN_GUEST_HANDLE_PARAM(const_char)?

> --- /dev/null
> +++ b/xen/include/xen/hypfs.h
> @@ -0,0 +1,89 @@
> +#ifndef __XEN_HYPFS_H__
> +#define __XEN_HYPFS_H__
> +
> +#include <xen/list.h>
> +#include <xen/string.h>
> +#include <public/hypfs.h>
> +
> +struct hypfs_entry_leaf;
> +
> +struct hypfs_entry {
> +    unsigned short type;
> +    unsigned short encoding;
> +    unsigned int size;
> +    const char *name;
> +    struct list_head list;
> +    int (*read)(const struct hypfs_entry *entry,
> +                XEN_GUEST_HANDLE_PARAM(void) uaddr);
> +    int (*write)(struct hypfs_entry_leaf *leaf,
> +                 XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen);
> +};
> +
> +struct hypfs_entry_leaf {
> +    struct hypfs_entry e;
> +    union {
> +        const void *content;
> +        void *write_ptr;
> +    };
> +};
> +
> +struct hypfs_entry_dir {
> +    struct hypfs_entry e;
> +    struct list_head dirlist;
> +};
> +
> +#define HYPFS_DIR_INIT(var, nam)                \
> +    struct hypfs_entry_dir var = {              \
> +        .e.type = XEN_HYPFS_TYPE_DIR,           \
> +        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
> +        .e.name = nam,                          \
> +        .e.size = 0,                            \
> +        .e.list = LIST_HEAD_INIT(var.e.list),   \
> +        .e.read = hypfs_read_dir,               \
> +        .dirlist = LIST_HEAD_INIT(var.dirlist), \
> +    }
> +
> +/* Content and size need to be set via hypfs_string_set(). */
> +#define HYPFS_STRING_INIT(var, nam)             \
> +    struct hypfs_entry_leaf var = {             \
> +        .e.type = XEN_HYPFS_TYPE_STRING,        \
> +        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
> +        .e.name = nam,                          \
> +        .e.read = hypfs_read_leaf,              \
> +    }
> +
> +static inline void hypfs_string_set(struct hypfs_entry_leaf *leaf,
> +                                    const char *str)
> +{
> +    leaf->content = str;
> +    leaf->e.size = strlen(str) + 1;
> +}
> +
> +#define HYPFS_UINT_INIT(var, nam, uint)         \
> +    struct hypfs_entry_leaf var = {             \
> +        .e.type = XEN_HYPFS_TYPE_UINT,          \
> +        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
> +        .e.name = nam,                          \
> +        .e.size = sizeof(uint),                 \
> +        .e.read = hypfs_read_leaf,              \
> +        .content = &uint,                       \
> +    }

So you've got such helper macros for dir, string, and uint. Why
not e.g. int and bool?

> +
> +
> +extern struct hypfs_entry_dir hypfs_root;

No double blank lines please.

> +struct hypfs_entry *hypfs_get_entry(const char *path);

Does the only caller really need a non-const return type? Even
hypfs_write() doesn't look to modify what its leaf parameter
points at.

And is there indeed an expectation for this to be used from
outside of the source file it's defined in?

Jan
Jürgen Groß Feb. 4, 2020, 6:43 a.m. UTC | #4
On 03.02.20 16:07, Jan Beulich wrote:
> On 21.01.2020 09:43, Juergen Gross wrote:
>> ---
>>   xen/arch/arm/traps.c         |   1 +
>>   xen/arch/x86/hvm/hypercall.c |   1 +
>>   xen/arch/x86/hypercall.c     |   1 +
>>   xen/arch/x86/pv/hypercall.c  |   1 +
>>   xen/common/Makefile          |   1 +
>>   xen/common/hypfs.c           | 365 +++++++++++++++++++++++++++++++++++++++++++
>>   xen/include/public/hypfs.h   | 124 +++++++++++++++
>>   xen/include/public/xen.h     |   1 +
>>   xen/include/xen/hypercall.h  |   8 +
>>   xen/include/xen/hypfs.h      |  89 +++++++++++
>>   10 files changed, 592 insertions(+)
> 
> Even if it's just two structures that you have in the public
> header, your assertion of the interface being guest bitness
> agnostic should be accompanied by actual proof, i.e. addition
> to xen/include/xlat.lst.

Okay.

> 
>> +static int add_entry(struct hypfs_entry_dir *parent, struct hypfs_entry *new)
>> +{
>> +    int ret = -ENOENT;
>> +    struct hypfs_entry *e;
>> +
>> +    write_lock(&hypfs_lock);
>> +
>> +    list_for_each_entry ( e, &parent->dirlist, list )
>> +    {
>> +        int cmp = strcmp(e->name, new->name);
>> +
>> +        if ( cmp > 0 )
>> +        {
>> +            ret = 0;
>> +            list_add_tail(&new->list, &e->list);
>> +            break;
>> +        }
>> +        if ( cmp == 0 )
>> +        {
>> +            ret = -EEXIST;
>> +            break;
>> +        }
>> +    }
>> +
>> +    if ( ret == -ENOENT )
>> +    {
>> +        ret = 0;
>> +        list_add_tail(&new->list, &parent->dirlist);
>> +    }
>> +
>> +    if ( !ret )
>> +    {
>> +        unsigned int sz = strlen(new->name) + 1;
>> +
>> +        parent->e.size += DIRENTRY_SIZE(sz);
> 
> Would DIRENTRY_SIZE() perhaps better include the "+ 1"?

Good idea.

> 
>> +int hypfs_add_entry(struct hypfs_entry_dir *parent,
>> +                    struct hypfs_entry *entry, bool nofault)
>> +{
>> +    int ret;
>> +
>> +    ret = add_entry(parent, entry);
>> +    BUG_ON(nofault && ret);
>> +
>> +    return ret;
>> +}
> 
> While this and its two siblings have no caller, the one above
> doesn't even have a declaration in the header file. What is
> this intended to be used for (from external callers)?

Oh, this seems to be a leftover from development phase. I'll remove
it.

> I also think the "nofault" aspect could do with discussing in
> the commit message - it seems quite odd to me.

I realized I was repeating the same code pattern multiple times, so
I decidied to have a common function for it. Will add something to the
commit message.

> 
>> +static int hypfs_get_path_user(char *buf, XEN_GUEST_HANDLE_PARAM(void) uaddr,
>> +                               unsigned long len)
> 
> For consistency with naming elsewhere as well as uaddr here -
> ulen?

Okay.

> 
>> +static struct hypfs_entry *hypfs_get_entry_rel(struct hypfs_entry_dir *dir,
>> +                                               const char *path)
>> +{
>> +    const char *end;
>> +    struct hypfs_entry *entry;
>> +    unsigned int name_len;
>> +
>> +    if ( !*path )
>> +        return &dir->e;
>> +
>> +    if ( dir->e.type != XEN_HYPFS_TYPE_DIR )
>> +        return NULL;
> 
> Don't these two need switching around, to make sure /a/b/c/
> doesn't match a non-dir /a/b/c ?

Oh, indeed.

> 
>> +    end = strchr(path, '/');
>> +    if ( !end )
>> +        end = strchr(path, '\0');
>> +    name_len = end - path;
>> +
>> +    list_for_each_entry ( entry, &dir->dirlist, list )
>> +    {
>> +        int cmp = strncmp(path, entry->name, name_len);
>> +	struct hypfs_entry_dir *d = container_of(entry,
> 
> A hard tab slipped in here.

Sorry for that.

> 
>> +                                                 struct hypfs_entry_dir, e);
>> +
>> +        if ( cmp < 0 )
>> +            return NULL;
>> +        if ( !cmp && strlen(entry->name) == name_len )
>> +            return *end ? hypfs_get_entry_rel(d, end + 1) : entry;
>> +    }
>> +
>> +    return NULL;
>> +}
>> +
>> +struct hypfs_entry *hypfs_get_entry(const char *path)
>> +{
>> +    if ( path[0] != '/' )
>> +        return NULL;
>> +
>> +    return hypfs_get_entry_rel(&hypfs_root, path + 1);
>> +}
>> +
>> +int hypfs_read_dir(const struct hypfs_entry *entry,
>> +                   XEN_GUEST_HANDLE_PARAM(void) uaddr)
>> +{
>> +    const struct hypfs_entry_dir *d;
>> +    struct hypfs_entry *e;
> 
> const?

Yes.

> 
>> +static int hypfs_read(const struct hypfs_entry *entry,
>> +                      XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
>> +{
>> +    struct xen_hypfs_direntry e;
>> +    long ret = -EINVAL;
>> +
>> +    if ( ulen < sizeof(e) )
>> +        goto out;
>> +
>> +    e.flags = entry->write ? XEN_HYPFS_WRITEABLE : 0;
>> +    e.type = entry->type;
>> +    e.encoding = entry->encoding;
>> +    e.content_len = entry->size;
>> +
>> +    ret = -EFAULT;
>> +    if ( copy_to_guest(uaddr, &e, 1) )
>> +        goto out;
>> +
>> +    ret = 0;
>> +    if ( ulen < entry->size + sizeof(e) )
>> +        goto out;
> 
> So you return "success" even if the operation didn't complete
> successfully. This isn't very nice, plus ...

The direntry contains the needed size. The caller should know the
size he passed to Xen.

> 
>> +    guest_handle_add_offset(uaddr, sizeof(e));
>> +
>> +    ret = entry->read(entry, uaddr);
> 
> ... how is the caller to know whether direntry was at least
> copied if this then fails?

Is this really important? Normally -EFAULT should just not happen. In
case it does I don't think the caller can make real use of the direntry.

> 
>> +int hypfs_write_leaf(struct hypfs_entry_leaf *leaf,
>> +                     XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
>> +{
>> +    char *buf;
>> +    int ret;
>> +
>> +    if ( ulen > leaf->e.size )
>> +        ulen = leaf->e.size;
> 
> Silent truncation?

Hmm, true. I'll modify it to return an error in that case.

>> +    buf = xzalloc_array(char, ulen);
> 
> Why the z variant?

Okay, will use xmalloc().

> 
>> +    if ( !buf )
>> +        return -ENOMEM;
>> +
>> +    ret = -EFAULT;
>> +    if ( copy_from_guest(buf, uaddr, ulen) )
>> +        goto out;
>> +
>> +    ret = 0;
>> +    if ( leaf->e.type == XEN_HYPFS_TYPE_STRING )
>> +        buf[leaf->e.size - 1] = 0;
> 
> And possible further silent truncation? And if the incoming
> buffer has no nul byte at ulen-1, you'll then ...
> 
>> +    memcpy(leaf->write_ptr, buf, ulen);
> 
> ... produce a strange concatenation of new and tail of old
> contents?

Will rework by returning an error if the size is too large or there is
no terminating nul byte.

> 
> Anyway, this and ...
> 
>> + out:
>> +    xfree(buf);
>> +    return ret;
>> +}
>> +
>> +int hypfs_write_bool(struct hypfs_entry_leaf *leaf,
>> +                     XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
>> +{
> 
> ... this function aren't very helpful to review without there
> being a caller. Could these be introduced at the time a first
> caller appears?

Of course. Question is where to stop. I wanted to have the basic hypfs
support in one patch. Are you fine with just those two functions being
moved to the runtime parameter patch?

> 
>> +    union {
>> +        char buf[8];
>> +        uint8_t u8;
>> +        uint16_t u16;
>> +        uint32_t u32;
>> +        uint64_t u64;
>> +    } u;
>> +
>> +    ASSERT(leaf->e.type == XEN_HYPFS_TYPE_UINT && leaf->e.size <= 8);
>> +
>> +    if ( ulen != leaf->e.size )
>> +        return -EDOM;
> 
> Is this restriction really necessary? Setting e.g. a 4-byte
> field from 1-byte input is no problem at all. This being for
> booleans I anyway wonder why input might be helpful to have
> larger than a single byte. But maybe all of this is again a
> result of not seeing what a user of the function would look
> like.

I wanted to have as little functionality as possible in the hypervisor.
It is no problem for the library to pass a properly sized buffer.

Allowing larger variables for booleans is just a consequence of the
hypervisor parameters allowing that.

> 
>> +long do_hypfs_op(unsigned int cmd,
>> +                 XEN_GUEST_HANDLE_PARAM(void) arg1, unsigned long arg2,
>> +                 XEN_GUEST_HANDLE_PARAM(void) arg3, unsigned long arg4)
>> +{
>> +    int ret;
>> +    struct hypfs_entry *entry;
>> +    static char path[XEN_HYPFS_MAX_PATHLEN];
>> +
>> +    if ( !is_control_domain(current->domain) &&
>> +         !is_hardware_domain(current->domain) )
>> +        return -EPERM;
>> +
>> +    if ( cmd == XEN_HYPFS_OP_get_version )
>> +        return XEN_HYPFS_VERSION;
>> +
>> +    if ( cmd == XEN_HYPFS_OP_write_contents )
>> +        write_lock(&hypfs_lock);
>> +    else
>> +        read_lock(&hypfs_lock);
>> +
>> +    ret = hypfs_get_path_user(path, arg1, arg2);
>> +    if ( ret )
>> +        goto out;
>> +
>> +    entry = hypfs_get_entry(path);
>> +    if ( !entry )
>> +    {
>> +        ret = -ENOENT;
>> +        goto out;
>> +    }
>> +
>> +    switch ( cmd )
>> +    {
>> +    case XEN_HYPFS_OP_read_contents:
>> +        ret = hypfs_read(entry, arg3, arg4);
>> +        break;
>> +
>> +    case XEN_HYPFS_OP_write_contents:
>> +        ret = hypfs_write(entry, arg3, arg4);
>> +        break;
>> +
>> +    default:
>> +        ret = -ENOSYS;
> 
> EINVAL or EOPNOTSUPP please. ENOSYS should be used only for not
> implemented top level functions.

Okay, will use EOPNOTSUPP.

> 
>> --- /dev/null
>> +++ b/xen/include/public/hypfs.h
>> @@ -0,0 +1,124 @@
>> +/******************************************************************************
>> + * Xen Hypervisor Filesystem
>> + *
>> + * Copyright (c) 2019, SUSE Software Solutions Germany GmbH
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>> + * of this software and associated documentation files (the "Software"), to
>> + * deal in the Software without restriction, including without limitation the
>> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
>> + * sell copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
>> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> + * DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#ifndef __XEN_PUBLIC_HYPFS_H__
>> +#define __XEN_PUBLIC_HYPFS_H__
>> +
>> +#include "xen.h"
>> +
>> +/*
>> + * Definitions for the __HYPERVISOR_hypfs_op hypercall.
>> + */
>> +
>> +/* Highest version number of the hypfs interface currently defined. */
>> +#define XEN_HYPFS_VERSION      1
> 
> For this and the accompanying XEN_HYPFS_OP_get_version, at least
> the doc added by patch 3 could actually do with mentioning the
> intentions you have with this.

Okay.

> 
>> +/* Maximum length of a path in the filesystem. */
>> +#define XEN_HYPFS_MAX_PATHLEN 1024
>> +
>> +struct xen_hypfs_direntry {
>> +    uint16_t flags;
>> +#define XEN_HYPFS_WRITEABLE    0x0001
>> +    uint8_t type;
>> +#define XEN_HYPFS_TYPE_DIR     0x0000
>> +#define XEN_HYPFS_TYPE_BLOB    0x0001
>> +#define XEN_HYPFS_TYPE_STRING  0x0002
>> +#define XEN_HYPFS_TYPE_UINT    0x0003
>> +#define XEN_HYPFS_TYPE_INT     0x0004
>> +#define XEN_HYPFS_TYPE_BOOL    0x0005
>> +    uint8_t encoding;
>> +#define XEN_HYPFS_ENC_PLAIN    0x0000
>> +#define XEN_HYPFS_ENC_GZIP     0x0001
> 
> Meaning I can e.g. have a gzip-ed string or bool (or even dir)?
> If this is just for "blob", why have separate fields instead of
> e.g. BLOB_RAW and BLOB_GZIP or some such?

gzip-ed string or blob are the primary targets.

Maybe we want to have other encoding s later (Andrew asked for that
possibility when I posted the patch for retrieving the .config file
contents early last year).

> Also - why 4 digits in the constants when the fields are uint8_t?
> Since these are enum-like, I even wonder if hex is warranted
> here.

I'm fine with switching to plain integers.

> 
>> +    uint32_t content_len;
>> +};
>> +
>> +struct xen_hypfs_dirlistentry {
>> +    struct xen_hypfs_direntry e;
>> +    /* Offset in bytes to next entry (0 == this is the last entry). */
>> +    uint16_t off_next;
>> +    char name[XEN_FLEX_ARRAY_DIM];
>> +};
> 
> The interaction of the last two fields may want spelling out:
> I _assume_ name[] is nul-terminated, and off_next exists to
> potentially skip trailing padding?

Yes. Will add a comment.

> 
>> +/*
>> + * Hypercall operations.
>> + */
>> +
>> +/*
>> + * XEN_HYPFS_OP_get_version
>> + *
>> + * Read highest interface version supported by the hypervisor.
>> + *
>> + * Possible return values:
>> + * >0: highest supported interface version
>> + * <0: negative Xen errno value
>> + */
>> +#define XEN_HYPFS_OP_get_version     0
>> +
>> +/*
>> + * XEN_HYPFS_OP_read_contents
>> + *
>> + * Read contents of a filesystem entry.
>> + *
>> + * Returns the direntry and contents of an entry in the buffer supplied by the
>> + * caller (struct xen_hypfs_direntry with the contents following directly
>> + * after it).
>> + * The data buffer must be at least the size of the direntry returned in order
>> + * to have success. If the data buffer was not large enough for all the data
>> + * no entry data is returned, but the direntry will contain the needed size
>> + * for the returned data.
>> + * The format of the contents is according to its entry type and encoding.
> 
> What specifically this means for a dir would be nice to be spelled
> out.

Okay.

> 
>> + * arg1: XEN_GUEST_HANDLE(path name)
>> + * arg2: length of path name (including trailing zero byte)
>> + * arg3: XEN_GUEST_HANDLE(data buffer written by hypervisor)
>> + * arg4: data buffer size
>> + *
>> + * Possible return values:
>> + * 0: success (at least the direntry was returned)
>> + * <0 : negative Xen errno value
>> + */
>> +#define XEN_HYPFS_OP_read_contents     1
>> +
>> +/*
>> + * XEN_HYPFS_OP_write_contents
>> + *
>> + * Write contents of a filesystem entry.
>> + *
>> + * Writes an entry with the contents of a buffer supplied by the caller.
>> + * The data type and encoding can't be changed. The size can be changed only
>> + * for blobs and strings.
>> + *
>> + * arg1: XEN_GUEST_HANDLE(path name)
>> + * arg2: length of path name (including trailing zero byte)
>> + * arg3: XEN_GUEST_HANDLE(content buffer read by hypervisor)
>> + * arg4: content buffer size
>> + *
>> + * Possible return values:
>> + * 0: success
>> + * <0 : negative Xen errno value
>> + */
>> +#define XEN_HYPFS_OP_write_contents    2
> 
> This one indeed accesses only the actual data (contents) of the
> referenced entry. XEN_HYPFS_OP_read_contents hands back also
> the dir entry. Should the latter then better be named just
> XEN_HYPFS_OP_read?

Yes, this might be better.

> 
>> --- a/xen/include/xen/hypercall.h
>> +++ b/xen/include/xen/hypercall.h
>> @@ -150,6 +150,14 @@ do_dm_op(
>>       unsigned int nr_bufs,
>>       XEN_GUEST_HANDLE_PARAM(xen_dm_op_buf_t) bufs);
>>   
>> +extern long
>> +do_hypfs_op(
>> +    unsigned int cmd,
>> +    XEN_GUEST_HANDLE_PARAM(void) arg1,
> 
> Do you anticipate this parameter to be used for other than path
> names? If not, perhaps better XEN_GUEST_HANDLE_PARAM(const_char)?

Yes, I think this can be changed.

> 
>> --- /dev/null
>> +++ b/xen/include/xen/hypfs.h
>> @@ -0,0 +1,89 @@
>> +#ifndef __XEN_HYPFS_H__
>> +#define __XEN_HYPFS_H__
>> +
>> +#include <xen/list.h>
>> +#include <xen/string.h>
>> +#include <public/hypfs.h>
>> +
>> +struct hypfs_entry_leaf;
>> +
>> +struct hypfs_entry {
>> +    unsigned short type;
>> +    unsigned short encoding;
>> +    unsigned int size;
>> +    const char *name;
>> +    struct list_head list;
>> +    int (*read)(const struct hypfs_entry *entry,
>> +                XEN_GUEST_HANDLE_PARAM(void) uaddr);
>> +    int (*write)(struct hypfs_entry_leaf *leaf,
>> +                 XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen);
>> +};
>> +
>> +struct hypfs_entry_leaf {
>> +    struct hypfs_entry e;
>> +    union {
>> +        const void *content;
>> +        void *write_ptr;
>> +    };
>> +};
>> +
>> +struct hypfs_entry_dir {
>> +    struct hypfs_entry e;
>> +    struct list_head dirlist;
>> +};
>> +
>> +#define HYPFS_DIR_INIT(var, nam)                \
>> +    struct hypfs_entry_dir var = {              \
>> +        .e.type = XEN_HYPFS_TYPE_DIR,           \
>> +        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
>> +        .e.name = nam,                          \
>> +        .e.size = 0,                            \
>> +        .e.list = LIST_HEAD_INIT(var.e.list),   \
>> +        .e.read = hypfs_read_dir,               \
>> +        .dirlist = LIST_HEAD_INIT(var.dirlist), \
>> +    }
>> +
>> +/* Content and size need to be set via hypfs_string_set(). */
>> +#define HYPFS_STRING_INIT(var, nam)             \
>> +    struct hypfs_entry_leaf var = {             \
>> +        .e.type = XEN_HYPFS_TYPE_STRING,        \
>> +        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
>> +        .e.name = nam,                          \
>> +        .e.read = hypfs_read_leaf,              \
>> +    }
>> +
>> +static inline void hypfs_string_set(struct hypfs_entry_leaf *leaf,
>> +                                    const char *str)
>> +{
>> +    leaf->content = str;
>> +    leaf->e.size = strlen(str) + 1;
>> +}
>> +
>> +#define HYPFS_UINT_INIT(var, nam, uint)         \
>> +    struct hypfs_entry_leaf var = {             \
>> +        .e.type = XEN_HYPFS_TYPE_UINT,          \
>> +        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
>> +        .e.name = nam,                          \
>> +        .e.size = sizeof(uint),                 \
>> +        .e.read = hypfs_read_leaf,              \
>> +        .content = &uint,                       \
>> +    }
> 
> So you've got such helper macros for dir, string, and uint. Why
> not e.g. int and bool?

There are no users in my series yet.

> 
>> +
>> +
>> +extern struct hypfs_entry_dir hypfs_root;
> 
> No double blank lines please.

Oh, sorry.

> 
>> +struct hypfs_entry *hypfs_get_entry(const char *path);
> 
> Does the only caller really need a non-const return type? Even
> hypfs_write() doesn't look to modify what its leaf parameter
> points at.

This might change when support for dynamically allocated strings or
blobs is added (I have no plans to do this right now, but its easy
to think about the need).

> 
> And is there indeed an expectation for this to be used from
> outside of the source file it's defined in?

Yes. As soon as support for e.g. per-domain or per-cpupool nodes is
added this will be needed.


Juergen
Jan Beulich Feb. 4, 2020, 8:48 a.m. UTC | #5
On 04.02.2020 07:43, Jürgen Groß wrote:
> On 03.02.20 16:07, Jan Beulich wrote:
>> On 21.01.2020 09:43, Juergen Gross wrote:
>>> +static int hypfs_read(const struct hypfs_entry *entry,
>>> +                      XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
>>> +{
>>> +    struct xen_hypfs_direntry e;
>>> +    long ret = -EINVAL;
>>> +
>>> +    if ( ulen < sizeof(e) )
>>> +        goto out;
>>> +
>>> +    e.flags = entry->write ? XEN_HYPFS_WRITEABLE : 0;
>>> +    e.type = entry->type;
>>> +    e.encoding = entry->encoding;
>>> +    e.content_len = entry->size;
>>> +
>>> +    ret = -EFAULT;
>>> +    if ( copy_to_guest(uaddr, &e, 1) )
>>> +        goto out;
>>> +
>>> +    ret = 0;
>>> +    if ( ulen < entry->size + sizeof(e) )
>>> +        goto out;
>>
>> So you return "success" even if the operation didn't complete
>> successfully. This isn't very nice, plus ...
> 
> The direntry contains the needed size. The caller should know the
> size he passed to Xen.
> 
>>
>>> +    guest_handle_add_offset(uaddr, sizeof(e));
>>> +
>>> +    ret = entry->read(entry, uaddr);
>>
>> ... how is the caller to know whether direntry was at least
>> copied if this then fails?
> 
> Is this really important? Normally -EFAULT should just not happen. In
> case it does I don't think the caller can make real use of the direntry.

"Important" has various possible meanings. The success/failure
indication to the caller should at least be rational. "If the
data buffer was not large enough for all the data no entry data
is returned, but the direntry will contain the needed size for
the returned data" is fine to be stated in the public header,
but I think this wants to be -ENOBUFS then, not 0 (success).

>> Anyway, this and ...
>>
>>> + out:
>>> +    xfree(buf);
>>> +    return ret;
>>> +}
>>> +
>>> +int hypfs_write_bool(struct hypfs_entry_leaf *leaf,
>>> +                     XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
>>> +{
>>
>> ... this function aren't very helpful to review without there
>> being a caller. Could these be introduced at the time a first
>> caller appears?
> 
> Of course. Question is where to stop. I wanted to have the basic hypfs
> support in one patch. Are you fine with just those two functions being
> moved to the runtime parameter patch?

Let me put it this way: For anything the patch adds but there's
no usage at all (i.e. not even in a macro, where at least the
usage intentions get sufficiently clarified), the description
should cover for this lack of sufficient context. Therefore I'd
also be fine with the two functions remaining here, as long as
readers (reviewers) can understand the intentions. It might
still be _easier_ for everyone to have them in a later patch.
But then the same still goes for other functions that have no
users here. (The helper macros HYPFS_*_INIT(), otoh, are clear
enough the way they are imo, and hence are fine to remain, plus
they serve as usage explanation for hypfs_read_{leaf,dir}(),
which as it looks would otherwise too be orphaned.)

>>> +    union {
>>> +        char buf[8];
>>> +        uint8_t u8;
>>> +        uint16_t u16;
>>> +        uint32_t u32;
>>> +        uint64_t u64;
>>> +    } u;
>>> +
>>> +    ASSERT(leaf->e.type == XEN_HYPFS_TYPE_UINT && leaf->e.size <= 8);
>>> +
>>> +    if ( ulen != leaf->e.size )
>>> +        return -EDOM;
>>
>> Is this restriction really necessary? Setting e.g. a 4-byte
>> field from 1-byte input is no problem at all. This being for
>> booleans I anyway wonder why input might be helpful to have
>> larger than a single byte. But maybe all of this is again a
>> result of not seeing what a user of the function would look
>> like.
> 
> I wanted to have as little functionality as possible in the hypervisor.
> It is no problem for the library to pass a properly sized buffer.
> 
> Allowing larger variables for booleans is just a consequence of the
> hypervisor parameters allowing that.

But the caller shouldn't be concerned of the hypervisor
implementation detail of what the chose width is. Over time we
e.g. convert int (along with bool_t) to bool when it's used in
a boolean way. This should not result in the caller needing to
change, despite the width change of the variable.

>>> --- /dev/null
>>> +++ b/xen/include/public/hypfs.h
>>> @@ -0,0 +1,124 @@
>>> +/******************************************************************************
>>> + * Xen Hypervisor Filesystem
>>> + *
>>> + * Copyright (c) 2019, SUSE Software Solutions Germany GmbH
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>>> + * of this software and associated documentation files (the "Software"), to
>>> + * deal in the Software without restriction, including without limitation the
>>> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
>>> + * sell copies of the Software, and to permit persons to whom the Software is
>>> + * furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
>>> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>>> + * DEALINGS IN THE SOFTWARE.
>>> + *
>>> + */
>>> +
>>> +#ifndef __XEN_PUBLIC_HYPFS_H__
>>> +#define __XEN_PUBLIC_HYPFS_H__
>>> +
>>> +#include "xen.h"
>>> +
>>> +/*
>>> + * Definitions for the __HYPERVISOR_hypfs_op hypercall.
>>> + */
>>> +
>>> +/* Highest version number of the hypfs interface currently defined. */
>>> +#define XEN_HYPFS_VERSION      1
>>
>> For this and the accompanying XEN_HYPFS_OP_get_version, at least
>> the doc added by patch 3 could actually do with mentioning the
>> intentions you have with this.
> 
> Okay.
> 
>>
>>> +/* Maximum length of a path in the filesystem. */
>>> +#define XEN_HYPFS_MAX_PATHLEN 1024
>>> +
>>> +struct xen_hypfs_direntry {
>>> +    uint16_t flags;
>>> +#define XEN_HYPFS_WRITEABLE    0x0001
>>> +    uint8_t type;
>>> +#define XEN_HYPFS_TYPE_DIR     0x0000
>>> +#define XEN_HYPFS_TYPE_BLOB    0x0001
>>> +#define XEN_HYPFS_TYPE_STRING  0x0002
>>> +#define XEN_HYPFS_TYPE_UINT    0x0003
>>> +#define XEN_HYPFS_TYPE_INT     0x0004
>>> +#define XEN_HYPFS_TYPE_BOOL    0x0005
>>> +    uint8_t encoding;
>>> +#define XEN_HYPFS_ENC_PLAIN    0x0000
>>> +#define XEN_HYPFS_ENC_GZIP     0x0001
>>
>> Meaning I can e.g. have a gzip-ed string or bool (or even dir)?
>> If this is just for "blob", why have separate fields instead of
>> e.g. BLOB_RAW and BLOB_GZIP or some such?
> 
> gzip-ed string or blob are the primary targets.
> 
> Maybe we want to have other encoding s later (Andrew asked for that
> possibility when I posted the patch for retrieving the .config file
> contents early last year).

To me it would seem preferable if the contents of a blob
identified itself as to its format. But since this leaves
room for ambiguities I accept that the format needs
specifying. However, to me a gzip-ed string is as good as a
gzip-ed blob, and hence I still think sub-dividing "blob" is
the way to go, with no separate "encoding". Otherwise at the
very least a comment here would need adding to clarify what
combinations are valid / to be expected by callers.

>>> +#define HYPFS_DIR_INIT(var, nam)                \
>>> +    struct hypfs_entry_dir var = {              \
>>> +        .e.type = XEN_HYPFS_TYPE_DIR,           \
>>> +        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
>>> +        .e.name = nam,                          \
>>> +        .e.size = 0,                            \
>>> +        .e.list = LIST_HEAD_INIT(var.e.list),   \
>>> +        .e.read = hypfs_read_dir,               \
>>> +        .dirlist = LIST_HEAD_INIT(var.dirlist), \
>>> +    }
>>> +
>>> +/* Content and size need to be set via hypfs_string_set(). */
>>> +#define HYPFS_STRING_INIT(var, nam)             \
>>> +    struct hypfs_entry_leaf var = {             \
>>> +        .e.type = XEN_HYPFS_TYPE_STRING,        \
>>> +        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
>>> +        .e.name = nam,                          \
>>> +        .e.read = hypfs_read_leaf,              \
>>> +    }
>>> +
>>> +static inline void hypfs_string_set(struct hypfs_entry_leaf *leaf,
>>> +                                    const char *str)
>>> +{
>>> +    leaf->content = str;
>>> +    leaf->e.size = strlen(str) + 1;
>>> +}
>>> +
>>> +#define HYPFS_UINT_INIT(var, nam, uint)         \
>>> +    struct hypfs_entry_leaf var = {             \
>>> +        .e.type = XEN_HYPFS_TYPE_UINT,          \
>>> +        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
>>> +        .e.name = nam,                          \
>>> +        .e.size = sizeof(uint),                 \
>>> +        .e.read = hypfs_read_leaf,              \
>>> +        .content = &uint,                       \
>>> +    }
>>
>> So you've got such helper macros for dir, string, and uint. Why
>> not e.g. int and bool?
> 
> There are no users in my series yet.

Hmm, as per above strictly speaking it is this patch which
matters, not the entire series. Hence I think you either want
to supply a full set of helper macros here, or introduce the
ones actually needed in the patches where they get first used.

>>> +struct hypfs_entry *hypfs_get_entry(const char *path);
>>
>> Does the only caller really need a non-const return type? Even
>> hypfs_write() doesn't look to modify what its leaf parameter
>> points at.
> 
> This might change when support for dynamically allocated strings or
> blobs is added (I have no plans to do this right now, but its easy
> to think about the need).
> 
>>
>> And is there indeed an expectation for this to be used from
>> outside of the source file it's defined in?
> 
> Yes. As soon as support for e.g. per-domain or per-cpupool nodes is
> added this will be needed.

Until then, make the function both static and return ptr-to-const?
Such that when this changes, the need for either can actually be
seen?

Jan
Jürgen Groß Feb. 4, 2020, 9:21 a.m. UTC | #6
On 04.02.20 09:48, Jan Beulich wrote:
> On 04.02.2020 07:43, Jürgen Groß wrote:
>> On 03.02.20 16:07, Jan Beulich wrote:
>>> On 21.01.2020 09:43, Juergen Gross wrote:
>>>> +static int hypfs_read(const struct hypfs_entry *entry,
>>>> +                      XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
>>>> +{
>>>> +    struct xen_hypfs_direntry e;
>>>> +    long ret = -EINVAL;
>>>> +
>>>> +    if ( ulen < sizeof(e) )
>>>> +        goto out;
>>>> +
>>>> +    e.flags = entry->write ? XEN_HYPFS_WRITEABLE : 0;
>>>> +    e.type = entry->type;
>>>> +    e.encoding = entry->encoding;
>>>> +    e.content_len = entry->size;
>>>> +
>>>> +    ret = -EFAULT;
>>>> +    if ( copy_to_guest(uaddr, &e, 1) )
>>>> +        goto out;
>>>> +
>>>> +    ret = 0;
>>>> +    if ( ulen < entry->size + sizeof(e) )
>>>> +        goto out;
>>>
>>> So you return "success" even if the operation didn't complete
>>> successfully. This isn't very nice, plus ...
>>
>> The direntry contains the needed size. The caller should know the
>> size he passed to Xen.
>>
>>>
>>>> +    guest_handle_add_offset(uaddr, sizeof(e));
>>>> +
>>>> +    ret = entry->read(entry, uaddr);
>>>
>>> ... how is the caller to know whether direntry was at least
>>> copied if this then fails?
>>
>> Is this really important? Normally -EFAULT should just not happen. In
>> case it does I don't think the caller can make real use of the direntry.
> 
> "Important" has various possible meanings. The success/failure
> indication to the caller should at least be rational. "If the
> data buffer was not large enough for all the data no entry data
> is returned, but the direntry will contain the needed size for
> the returned data" is fine to be stated in the public header,
> but I think this wants to be -ENOBUFS then, not 0 (success).

I would be fine with this, but this contradicts your previous demand
not to enumerate the possible failure cases, which would be essential
for this case.

> 
>>> Anyway, this and ...
>>>
>>>> + out:
>>>> +    xfree(buf);
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +int hypfs_write_bool(struct hypfs_entry_leaf *leaf,
>>>> +                     XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
>>>> +{
>>>
>>> ... this function aren't very helpful to review without there
>>> being a caller. Could these be introduced at the time a first
>>> caller appears?
>>
>> Of course. Question is where to stop. I wanted to have the basic hypfs
>> support in one patch. Are you fine with just those two functions being
>> moved to the runtime parameter patch?
> 
> Let me put it this way: For anything the patch adds but there's
> no usage at all (i.e. not even in a macro, where at least the
> usage intentions get sufficiently clarified), the description
> should cover for this lack of sufficient context. Therefore I'd
> also be fine with the two functions remaining here, as long as
> readers (reviewers) can understand the intentions. It might
> still be _easier_ for everyone to have them in a later patch.
> But then the same still goes for other functions that have no
> users here. (The helper macros HYPFS_*_INIT(), otoh, are clear
> enough the way they are imo, and hence are fine to remain, plus
> they serve as usage explanation for hypfs_read_{leaf,dir}(),
> which as it looks would otherwise too be orphaned.)

Fair enough.

> 
>>>> +    union {
>>>> +        char buf[8];
>>>> +        uint8_t u8;
>>>> +        uint16_t u16;
>>>> +        uint32_t u32;
>>>> +        uint64_t u64;
>>>> +    } u;
>>>> +
>>>> +    ASSERT(leaf->e.type == XEN_HYPFS_TYPE_UINT && leaf->e.size <= 8);
>>>> +
>>>> +    if ( ulen != leaf->e.size )
>>>> +        return -EDOM;
>>>
>>> Is this restriction really necessary? Setting e.g. a 4-byte
>>> field from 1-byte input is no problem at all. This being for
>>> booleans I anyway wonder why input might be helpful to have
>>> larger than a single byte. But maybe all of this is again a
>>> result of not seeing what a user of the function would look
>>> like.
>>
>> I wanted to have as little functionality as possible in the hypervisor.
>> It is no problem for the library to pass a properly sized buffer.
>>
>> Allowing larger variables for booleans is just a consequence of the
>> hypervisor parameters allowing that.
> 
> But the caller shouldn't be concerned of the hypervisor
> implementation detail of what the chose width is. Over time we
> e.g. convert int (along with bool_t) to bool when it's used in
> a boolean way. This should not result in the caller needing to
> change, despite the width change of the variable.

This is basically a consequence of now passing binary values to and from
the hypervisor.

The normal way of handling this (as can be seen in libxenhypfs) is to
query the hypervisor for the size of the value (no matter whether its
int, uint or bool) and then to do the conversion between ASCII and the
binary value at the caller's side.

> 
>>>> --- /dev/null
>>>> +++ b/xen/include/public/hypfs.h
>>>> @@ -0,0 +1,124 @@
>>>> +/******************************************************************************
>>>> + * Xen Hypervisor Filesystem
>>>> + *
>>>> + * Copyright (c) 2019, SUSE Software Solutions Germany GmbH
>>>> + *
>>>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>>>> + * of this software and associated documentation files (the "Software"), to
>>>> + * deal in the Software without restriction, including without limitation the
>>>> + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
>>>> + * sell copies of the Software, and to permit persons to whom the Software is
>>>> + * furnished to do so, subject to the following conditions:
>>>> + *
>>>> + * The above copyright notice and this permission notice shall be included in
>>>> + * all copies or substantial portions of the Software.
>>>> + *
>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
>>>> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>>>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>>>> + * DEALINGS IN THE SOFTWARE.
>>>> + *
>>>> + */
>>>> +
>>>> +#ifndef __XEN_PUBLIC_HYPFS_H__
>>>> +#define __XEN_PUBLIC_HYPFS_H__
>>>> +
>>>> +#include "xen.h"
>>>> +
>>>> +/*
>>>> + * Definitions for the __HYPERVISOR_hypfs_op hypercall.
>>>> + */
>>>> +
>>>> +/* Highest version number of the hypfs interface currently defined. */
>>>> +#define XEN_HYPFS_VERSION      1
>>>
>>> For this and the accompanying XEN_HYPFS_OP_get_version, at least
>>> the doc added by patch 3 could actually do with mentioning the
>>> intentions you have with this.
>>
>> Okay.
>>
>>>
>>>> +/* Maximum length of a path in the filesystem. */
>>>> +#define XEN_HYPFS_MAX_PATHLEN 1024
>>>> +
>>>> +struct xen_hypfs_direntry {
>>>> +    uint16_t flags;
>>>> +#define XEN_HYPFS_WRITEABLE    0x0001
>>>> +    uint8_t type;
>>>> +#define XEN_HYPFS_TYPE_DIR     0x0000
>>>> +#define XEN_HYPFS_TYPE_BLOB    0x0001
>>>> +#define XEN_HYPFS_TYPE_STRING  0x0002
>>>> +#define XEN_HYPFS_TYPE_UINT    0x0003
>>>> +#define XEN_HYPFS_TYPE_INT     0x0004
>>>> +#define XEN_HYPFS_TYPE_BOOL    0x0005
>>>> +    uint8_t encoding;
>>>> +#define XEN_HYPFS_ENC_PLAIN    0x0000
>>>> +#define XEN_HYPFS_ENC_GZIP     0x0001
>>>
>>> Meaning I can e.g. have a gzip-ed string or bool (or even dir)?
>>> If this is just for "blob", why have separate fields instead of
>>> e.g. BLOB_RAW and BLOB_GZIP or some such?
>>
>> gzip-ed string or blob are the primary targets.
>>
>> Maybe we want to have other encoding s later (Andrew asked for that
>> possibility when I posted the patch for retrieving the .config file
>> contents early last year).
> 
> To me it would seem preferable if the contents of a blob
> identified itself as to its format. But since this leaves
> room for ambiguities I accept that the format needs
> specifying. However, to me a gzip-ed string is as good as a
> gzip-ed blob, and hence I still think sub-dividing "blob" is
> the way to go, with no separate "encoding". Otherwise at the
> very least a comment here would need adding to clarify what
> combinations are valid / to be expected by callers.

libxenhypfs is able to handle all possible combinations. I just don't
think some of the combinations are making sense (gzip-ing a binary
value of 4 bytes e.g. is nonsense).

OTOH in case we'll add large arrays of longs in the future it might be
beneficial to compress them in some way. So I'd like to keep type and
encoding as separate information.

> 
>>>> +#define HYPFS_DIR_INIT(var, nam)                \
>>>> +    struct hypfs_entry_dir var = {              \
>>>> +        .e.type = XEN_HYPFS_TYPE_DIR,           \
>>>> +        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
>>>> +        .e.name = nam,                          \
>>>> +        .e.size = 0,                            \
>>>> +        .e.list = LIST_HEAD_INIT(var.e.list),   \
>>>> +        .e.read = hypfs_read_dir,               \
>>>> +        .dirlist = LIST_HEAD_INIT(var.dirlist), \
>>>> +    }
>>>> +
>>>> +/* Content and size need to be set via hypfs_string_set(). */
>>>> +#define HYPFS_STRING_INIT(var, nam)             \
>>>> +    struct hypfs_entry_leaf var = {             \
>>>> +        .e.type = XEN_HYPFS_TYPE_STRING,        \
>>>> +        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
>>>> +        .e.name = nam,                          \
>>>> +        .e.read = hypfs_read_leaf,              \
>>>> +    }
>>>> +
>>>> +static inline void hypfs_string_set(struct hypfs_entry_leaf *leaf,
>>>> +                                    const char *str)
>>>> +{
>>>> +    leaf->content = str;
>>>> +    leaf->e.size = strlen(str) + 1;
>>>> +}
>>>> +
>>>> +#define HYPFS_UINT_INIT(var, nam, uint)         \
>>>> +    struct hypfs_entry_leaf var = {             \
>>>> +        .e.type = XEN_HYPFS_TYPE_UINT,          \
>>>> +        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
>>>> +        .e.name = nam,                          \
>>>> +        .e.size = sizeof(uint),                 \
>>>> +        .e.read = hypfs_read_leaf,              \
>>>> +        .content = &uint,                       \
>>>> +    }
>>>
>>> So you've got such helper macros for dir, string, and uint. Why
>>> not e.g. int and bool?
>>
>> There are no users in my series yet.
> 
> Hmm, as per above strictly speaking it is this patch which
> matters, not the entire series. Hence I think you either want
> to supply a full set of helper macros here, or introduce the
> ones actually needed in the patches where they get first used.

Okay, I'll go with the full set of helpers.

> 
>>>> +struct hypfs_entry *hypfs_get_entry(const char *path);
>>>
>>> Does the only caller really need a non-const return type? Even
>>> hypfs_write() doesn't look to modify what its leaf parameter
>>> points at.
>>
>> This might change when support for dynamically allocated strings or
>> blobs is added (I have no plans to do this right now, but its easy
>> to think about the need).
>>
>>>
>>> And is there indeed an expectation for this to be used from
>>> outside of the source file it's defined in?
>>
>> Yes. As soon as support for e.g. per-domain or per-cpupool nodes is
>> added this will be needed.
> 
> Until then, make the function both static and return ptr-to-const?
> Such that when this changes, the need for either can actually be
> seen?

Fine with me.


Juergen
Jan Beulich Feb. 4, 2020, 9:58 a.m. UTC | #7
On 04.02.2020 10:21, Jürgen Groß wrote:
> On 04.02.20 09:48, Jan Beulich wrote:
>> On 04.02.2020 07:43, Jürgen Groß wrote:
>>> On 03.02.20 16:07, Jan Beulich wrote:
>>>> On 21.01.2020 09:43, Juergen Gross wrote:
>>>>> +static int hypfs_read(const struct hypfs_entry *entry,
>>>>> +                      XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
>>>>> +{
>>>>> +    struct xen_hypfs_direntry e;
>>>>> +    long ret = -EINVAL;
>>>>> +
>>>>> +    if ( ulen < sizeof(e) )
>>>>> +        goto out;
>>>>> +
>>>>> +    e.flags = entry->write ? XEN_HYPFS_WRITEABLE : 0;
>>>>> +    e.type = entry->type;
>>>>> +    e.encoding = entry->encoding;
>>>>> +    e.content_len = entry->size;
>>>>> +
>>>>> +    ret = -EFAULT;
>>>>> +    if ( copy_to_guest(uaddr, &e, 1) )
>>>>> +        goto out;
>>>>> +
>>>>> +    ret = 0;
>>>>> +    if ( ulen < entry->size + sizeof(e) )
>>>>> +        goto out;
>>>>
>>>> So you return "success" even if the operation didn't complete
>>>> successfully. This isn't very nice, plus ...
>>>
>>> The direntry contains the needed size. The caller should know the
>>> size he passed to Xen.
>>>
>>>>
>>>>> +    guest_handle_add_offset(uaddr, sizeof(e));
>>>>> +
>>>>> +    ret = entry->read(entry, uaddr);
>>>>
>>>> ... how is the caller to know whether direntry was at least
>>>> copied if this then fails?
>>>
>>> Is this really important? Normally -EFAULT should just not happen. In
>>> case it does I don't think the caller can make real use of the direntry.
>>
>> "Important" has various possible meanings. The success/failure
>> indication to the caller should at least be rational. "If the
>> data buffer was not large enough for all the data no entry data
>> is returned, but the direntry will contain the needed size for
>> the returned data" is fine to be stated in the public header,
>> but I think this wants to be -ENOBUFS then, not 0 (success).
> 
> I would be fine with this, but this contradicts your previous demand
> not to enumerate the possible failure cases, which would be essential
> for this case.

Slightly re-writing the part of the comment I did quote would be
all that's needed afaict: "If the data buffer was not large enough
for all the data -ENOBUFS and no entry data is returned, but the
direntry will contain the needed size for the returned data."

>>>>> +    union {
>>>>> +        char buf[8];
>>>>> +        uint8_t u8;
>>>>> +        uint16_t u16;
>>>>> +        uint32_t u32;
>>>>> +        uint64_t u64;
>>>>> +    } u;
>>>>> +
>>>>> +    ASSERT(leaf->e.type == XEN_HYPFS_TYPE_UINT && leaf->e.size <= 8);
>>>>> +
>>>>> +    if ( ulen != leaf->e.size )
>>>>> +        return -EDOM;
>>>>
>>>> Is this restriction really necessary? Setting e.g. a 4-byte
>>>> field from 1-byte input is no problem at all. This being for
>>>> booleans I anyway wonder why input might be helpful to have
>>>> larger than a single byte. But maybe all of this is again a
>>>> result of not seeing what a user of the function would look
>>>> like.
>>>
>>> I wanted to have as little functionality as possible in the hypervisor.
>>> It is no problem for the library to pass a properly sized buffer.
>>>
>>> Allowing larger variables for booleans is just a consequence of the
>>> hypervisor parameters allowing that.
>>
>> But the caller shouldn't be concerned of the hypervisor
>> implementation detail of what the chose width is. Over time we
>> e.g. convert int (along with bool_t) to bool when it's used in
>> a boolean way. This should not result in the caller needing to
>> change, despite the width change of the variable.
> 
> This is basically a consequence of now passing binary values to and from
> the hypervisor.
> 
> The normal way of handling this (as can be seen in libxenhypfs) is to
> query the hypervisor for the size of the value (no matter whether its
> int, uint or bool) and then to do the conversion between ASCII and the
> binary value at the caller's side.

I can see why this is needed for e.g. integer values, but I don't
see the need for booleans.

>>>>> +struct xen_hypfs_direntry {
>>>>> +    uint16_t flags;
>>>>> +#define XEN_HYPFS_WRITEABLE    0x0001
>>>>> +    uint8_t type;
>>>>> +#define XEN_HYPFS_TYPE_DIR     0x0000
>>>>> +#define XEN_HYPFS_TYPE_BLOB    0x0001
>>>>> +#define XEN_HYPFS_TYPE_STRING  0x0002
>>>>> +#define XEN_HYPFS_TYPE_UINT    0x0003
>>>>> +#define XEN_HYPFS_TYPE_INT     0x0004
>>>>> +#define XEN_HYPFS_TYPE_BOOL    0x0005
>>>>> +    uint8_t encoding;
>>>>> +#define XEN_HYPFS_ENC_PLAIN    0x0000
>>>>> +#define XEN_HYPFS_ENC_GZIP     0x0001
>>>>
>>>> Meaning I can e.g. have a gzip-ed string or bool (or even dir)?
>>>> If this is just for "blob", why have separate fields instead of
>>>> e.g. BLOB_RAW and BLOB_GZIP or some such?
>>>
>>> gzip-ed string or blob are the primary targets.
>>>
>>> Maybe we want to have other encoding s later (Andrew asked for that
>>> possibility when I posted the patch for retrieving the .config file
>>> contents early last year).
>>
>> To me it would seem preferable if the contents of a blob
>> identified itself as to its format. But since this leaves
>> room for ambiguities I accept that the format needs
>> specifying. However, to me a gzip-ed string is as good as a
>> gzip-ed blob, and hence I still think sub-dividing "blob" is
>> the way to go, with no separate "encoding". Otherwise at the
>> very least a comment here would need adding to clarify what
>> combinations are valid / to be expected by callers.
> 
> libxenhypfs is able to handle all possible combinations. I just don't
> think some of the combinations are making sense (gzip-ing a binary
> value of 4 bytes e.g. is nonsense).
> 
> OTOH in case we'll add large arrays of longs in the future it might be
> beneficial to compress them in some way. So I'd like to keep type and
> encoding as separate information.

Okay, I'm not entirely opposed. But I'd be curious if anyone
else has an opinion here.

Jan
Jürgen Groß Feb. 4, 2020, 10:48 a.m. UTC | #8
On 04.02.20 10:58, Jan Beulich wrote:
> On 04.02.2020 10:21, Jürgen Groß wrote:
>> On 04.02.20 09:48, Jan Beulich wrote:
>>> On 04.02.2020 07:43, Jürgen Groß wrote:
>>>> On 03.02.20 16:07, Jan Beulich wrote:
>>>>> On 21.01.2020 09:43, Juergen Gross wrote:
>>>>>> +static int hypfs_read(const struct hypfs_entry *entry,
>>>>>> +                      XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
>>>>>> +{
>>>>>> +    struct xen_hypfs_direntry e;
>>>>>> +    long ret = -EINVAL;
>>>>>> +
>>>>>> +    if ( ulen < sizeof(e) )
>>>>>> +        goto out;
>>>>>> +
>>>>>> +    e.flags = entry->write ? XEN_HYPFS_WRITEABLE : 0;
>>>>>> +    e.type = entry->type;
>>>>>> +    e.encoding = entry->encoding;
>>>>>> +    e.content_len = entry->size;
>>>>>> +
>>>>>> +    ret = -EFAULT;
>>>>>> +    if ( copy_to_guest(uaddr, &e, 1) )
>>>>>> +        goto out;
>>>>>> +
>>>>>> +    ret = 0;
>>>>>> +    if ( ulen < entry->size + sizeof(e) )
>>>>>> +        goto out;
>>>>>
>>>>> So you return "success" even if the operation didn't complete
>>>>> successfully. This isn't very nice, plus ...
>>>>
>>>> The direntry contains the needed size. The caller should know the
>>>> size he passed to Xen.
>>>>
>>>>>
>>>>>> +    guest_handle_add_offset(uaddr, sizeof(e));
>>>>>> +
>>>>>> +    ret = entry->read(entry, uaddr);
>>>>>
>>>>> ... how is the caller to know whether direntry was at least
>>>>> copied if this then fails?
>>>>
>>>> Is this really important? Normally -EFAULT should just not happen. In
>>>> case it does I don't think the caller can make real use of the direntry.
>>>
>>> "Important" has various possible meanings. The success/failure
>>> indication to the caller should at least be rational. "If the
>>> data buffer was not large enough for all the data no entry data
>>> is returned, but the direntry will contain the needed size for
>>> the returned data" is fine to be stated in the public header,
>>> but I think this wants to be -ENOBUFS then, not 0 (success).
>>
>> I would be fine with this, but this contradicts your previous demand
>> not to enumerate the possible failure cases, which would be essential
>> for this case.
> 
> Slightly re-writing the part of the comment I did quote would be
> all that's needed afaict: "If the data buffer was not large enough
> for all the data -ENOBUFS and no entry data is returned, but the
> direntry will contain the needed size for the returned data."

Okay. Fine with me.

> 
>>>>>> +    union {
>>>>>> +        char buf[8];
>>>>>> +        uint8_t u8;
>>>>>> +        uint16_t u16;
>>>>>> +        uint32_t u32;
>>>>>> +        uint64_t u64;
>>>>>> +    } u;
>>>>>> +
>>>>>> +    ASSERT(leaf->e.type == XEN_HYPFS_TYPE_UINT && leaf->e.size <= 8);
>>>>>> +
>>>>>> +    if ( ulen != leaf->e.size )
>>>>>> +        return -EDOM;
>>>>>
>>>>> Is this restriction really necessary? Setting e.g. a 4-byte
>>>>> field from 1-byte input is no problem at all. This being for
>>>>> booleans I anyway wonder why input might be helpful to have
>>>>> larger than a single byte. But maybe all of this is again a
>>>>> result of not seeing what a user of the function would look
>>>>> like.
>>>>
>>>> I wanted to have as little functionality as possible in the hypervisor.
>>>> It is no problem for the library to pass a properly sized buffer.
>>>>
>>>> Allowing larger variables for booleans is just a consequence of the
>>>> hypervisor parameters allowing that.
>>>
>>> But the caller shouldn't be concerned of the hypervisor
>>> implementation detail of what the chose width is. Over time we
>>> e.g. convert int (along with bool_t) to bool when it's used in
>>> a boolean way. This should not result in the caller needing to
>>> change, despite the width change of the variable.
>>
>> This is basically a consequence of now passing binary values to and from
>> the hypervisor.
>>
>> The normal way of handling this (as can be seen in libxenhypfs) is to
>> query the hypervisor for the size of the value (no matter whether its
>> int, uint or bool) and then to do the conversion between ASCII and the
>> binary value at the caller's side.
> 
> I can see why this is needed for e.g. integer values, but I don't
> see the need for booleans.

At some level the conversion needs to be done. I'd rather do it for all
types at the same level.

> 
>>>>>> +struct xen_hypfs_direntry {
>>>>>> +    uint16_t flags;
>>>>>> +#define XEN_HYPFS_WRITEABLE    0x0001
>>>>>> +    uint8_t type;
>>>>>> +#define XEN_HYPFS_TYPE_DIR     0x0000
>>>>>> +#define XEN_HYPFS_TYPE_BLOB    0x0001
>>>>>> +#define XEN_HYPFS_TYPE_STRING  0x0002
>>>>>> +#define XEN_HYPFS_TYPE_UINT    0x0003
>>>>>> +#define XEN_HYPFS_TYPE_INT     0x0004
>>>>>> +#define XEN_HYPFS_TYPE_BOOL    0x0005
>>>>>> +    uint8_t encoding;
>>>>>> +#define XEN_HYPFS_ENC_PLAIN    0x0000
>>>>>> +#define XEN_HYPFS_ENC_GZIP     0x0001
>>>>>
>>>>> Meaning I can e.g. have a gzip-ed string or bool (or even dir)?
>>>>> If this is just for "blob", why have separate fields instead of
>>>>> e.g. BLOB_RAW and BLOB_GZIP or some such?
>>>>
>>>> gzip-ed string or blob are the primary targets.
>>>>
>>>> Maybe we want to have other encoding s later (Andrew asked for that
>>>> possibility when I posted the patch for retrieving the .config file
>>>> contents early last year).
>>>
>>> To me it would seem preferable if the contents of a blob
>>> identified itself as to its format. But since this leaves
>>> room for ambiguities I accept that the format needs
>>> specifying. However, to me a gzip-ed string is as good as a
>>> gzip-ed blob, and hence I still think sub-dividing "blob" is
>>> the way to go, with no separate "encoding". Otherwise at the
>>> very least a comment here would need adding to clarify what
>>> combinations are valid / to be expected by callers.
>>
>> libxenhypfs is able to handle all possible combinations. I just don't
>> think some of the combinations are making sense (gzip-ing a binary
>> value of 4 bytes e.g. is nonsense).
>>
>> OTOH in case we'll add large arrays of longs in the future it might be
>> beneficial to compress them in some way. So I'd like to keep type and
>> encoding as separate information.
> 
> Okay, I'm not entirely opposed. But I'd be curious if anyone
> else has an opinion here.

I think content type and transport encoding should not be mixed up. They
are orthogonal to each other and so they should be handled.


Juergen
Jan Beulich Feb. 4, 2020, 11:28 a.m. UTC | #9
On 04.02.2020 11:48, Jürgen Groß wrote:
> On 04.02.20 10:58, Jan Beulich wrote:
>> On 04.02.2020 10:21, Jürgen Groß wrote:
>>> On 04.02.20 09:48, Jan Beulich wrote:
>>>> On 04.02.2020 07:43, Jürgen Groß wrote:
>>>>> On 03.02.20 16:07, Jan Beulich wrote:
>>>>>> On 21.01.2020 09:43, Juergen Gross wrote:
>>>>>>> +struct xen_hypfs_direntry {
>>>>>>> +    uint16_t flags;
>>>>>>> +#define XEN_HYPFS_WRITEABLE    0x0001
>>>>>>> +    uint8_t type;
>>>>>>> +#define XEN_HYPFS_TYPE_DIR     0x0000
>>>>>>> +#define XEN_HYPFS_TYPE_BLOB    0x0001
>>>>>>> +#define XEN_HYPFS_TYPE_STRING  0x0002
>>>>>>> +#define XEN_HYPFS_TYPE_UINT    0x0003
>>>>>>> +#define XEN_HYPFS_TYPE_INT     0x0004
>>>>>>> +#define XEN_HYPFS_TYPE_BOOL    0x0005
>>>>>>> +    uint8_t encoding;
>>>>>>> +#define XEN_HYPFS_ENC_PLAIN    0x0000
>>>>>>> +#define XEN_HYPFS_ENC_GZIP     0x0001
>>>>>>
>>>>>> Meaning I can e.g. have a gzip-ed string or bool (or even dir)?
>>>>>> If this is just for "blob", why have separate fields instead of
>>>>>> e.g. BLOB_RAW and BLOB_GZIP or some such?
>>>>>
>>>>> gzip-ed string or blob are the primary targets.
>>>>>
>>>>> Maybe we want to have other encoding s later (Andrew asked for that
>>>>> possibility when I posted the patch for retrieving the .config file
>>>>> contents early last year).
>>>>
>>>> To me it would seem preferable if the contents of a blob
>>>> identified itself as to its format. But since this leaves
>>>> room for ambiguities I accept that the format needs
>>>> specifying. However, to me a gzip-ed string is as good as a
>>>> gzip-ed blob, and hence I still think sub-dividing "blob" is
>>>> the way to go, with no separate "encoding". Otherwise at the
>>>> very least a comment here would need adding to clarify what
>>>> combinations are valid / to be expected by callers.
>>>
>>> libxenhypfs is able to handle all possible combinations. I just don't
>>> think some of the combinations are making sense (gzip-ing a binary
>>> value of 4 bytes e.g. is nonsense).
>>>
>>> OTOH in case we'll add large arrays of longs in the future it might be
>>> beneficial to compress them in some way. So I'd like to keep type and
>>> encoding as separate information.
>>
>> Okay, I'm not entirely opposed. But I'd be curious if anyone
>> else has an opinion here.
> 
> I think content type and transport encoding should not be mixed up. They
> are orthogonal to each other and so they should be handled.

In principle I agree, but "blob" really covers anything or nothing
at all. Yes, if strings are meant to be possible to be gzip-ed,
then there is value in the separation. I'm not fully convinced
though that such compressed strings (Are you thinking about
.config here?) shouldn't simply be "blob" then, too.

Jan
Jürgen Groß Feb. 4, 2020, 11:38 a.m. UTC | #10
On 04.02.20 12:28, Jan Beulich wrote:
> On 04.02.2020 11:48, Jürgen Groß wrote:
>> On 04.02.20 10:58, Jan Beulich wrote:
>>> On 04.02.2020 10:21, Jürgen Groß wrote:
>>>> On 04.02.20 09:48, Jan Beulich wrote:
>>>>> On 04.02.2020 07:43, Jürgen Groß wrote:
>>>>>> On 03.02.20 16:07, Jan Beulich wrote:
>>>>>>> On 21.01.2020 09:43, Juergen Gross wrote:
>>>>>>>> +struct xen_hypfs_direntry {
>>>>>>>> +    uint16_t flags;
>>>>>>>> +#define XEN_HYPFS_WRITEABLE    0x0001
>>>>>>>> +    uint8_t type;
>>>>>>>> +#define XEN_HYPFS_TYPE_DIR     0x0000
>>>>>>>> +#define XEN_HYPFS_TYPE_BLOB    0x0001
>>>>>>>> +#define XEN_HYPFS_TYPE_STRING  0x0002
>>>>>>>> +#define XEN_HYPFS_TYPE_UINT    0x0003
>>>>>>>> +#define XEN_HYPFS_TYPE_INT     0x0004
>>>>>>>> +#define XEN_HYPFS_TYPE_BOOL    0x0005
>>>>>>>> +    uint8_t encoding;
>>>>>>>> +#define XEN_HYPFS_ENC_PLAIN    0x0000
>>>>>>>> +#define XEN_HYPFS_ENC_GZIP     0x0001
>>>>>>>
>>>>>>> Meaning I can e.g. have a gzip-ed string or bool (or even dir)?
>>>>>>> If this is just for "blob", why have separate fields instead of
>>>>>>> e.g. BLOB_RAW and BLOB_GZIP or some such?
>>>>>>
>>>>>> gzip-ed string or blob are the primary targets.
>>>>>>
>>>>>> Maybe we want to have other encoding s later (Andrew asked for that
>>>>>> possibility when I posted the patch for retrieving the .config file
>>>>>> contents early last year).
>>>>>
>>>>> To me it would seem preferable if the contents of a blob
>>>>> identified itself as to its format. But since this leaves
>>>>> room for ambiguities I accept that the format needs
>>>>> specifying. However, to me a gzip-ed string is as good as a
>>>>> gzip-ed blob, and hence I still think sub-dividing "blob" is
>>>>> the way to go, with no separate "encoding". Otherwise at the
>>>>> very least a comment here would need adding to clarify what
>>>>> combinations are valid / to be expected by callers.
>>>>
>>>> libxenhypfs is able to handle all possible combinations. I just don't
>>>> think some of the combinations are making sense (gzip-ing a binary
>>>> value of 4 bytes e.g. is nonsense).
>>>>
>>>> OTOH in case we'll add large arrays of longs in the future it might be
>>>> beneficial to compress them in some way. So I'd like to keep type and
>>>> encoding as separate information.
>>>
>>> Okay, I'm not entirely opposed. But I'd be curious if anyone
>>> else has an opinion here.
>>
>> I think content type and transport encoding should not be mixed up. They
>> are orthogonal to each other and so they should be handled.
> 
> In principle I agree, but "blob" really covers anything or nothing
> at all. Yes, if strings are meant to be possible to be gzip-ed,
> then there is value in the separation. I'm not fully convinced
> though that such compressed strings (Are you thinking about
> .config here?) shouldn't simply be "blob" then, too.

With a library on top of the hypercall it is easy to hide the encoding
from the standard user. So even with .config being held in gzip-ed
format in the hypervisor the xenhypfs tool will still just print the
textual form of it when reading the associated node. This is different
from sysfs or procfs of the Linux kernel, where the raw data is
presented at the primary user interface. Additionally this enables us
to avoid having to specify the compression format as a stable ABI. We
could at any time switch to uncompressed format without problem.


Juergen
diff mbox series

Patch

diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 6f9bec22d3..87af810667 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -1382,6 +1382,7 @@  static arm_hypercall_t arm_hypercall_table[] = {
 #ifdef CONFIG_ARGO
     HYPERCALL(argo_op, 5),
 #endif
+    HYPERCALL(hypfs_op, 5),
 };
 
 #ifndef NDEBUG
diff --git a/xen/arch/x86/hvm/hypercall.c b/xen/arch/x86/hvm/hypercall.c
index 33dd2d99d2..210dda4f38 100644
--- a/xen/arch/x86/hvm/hypercall.c
+++ b/xen/arch/x86/hvm/hypercall.c
@@ -144,6 +144,7 @@  static const hypercall_table_t hvm_hypercall_table[] = {
 #endif
     HYPERCALL(xenpmu_op),
     COMPAT_CALL(dm_op),
+    HYPERCALL(hypfs_op),
     HYPERCALL(arch_1)
 };
 
diff --git a/xen/arch/x86/hypercall.c b/xen/arch/x86/hypercall.c
index 7f299d45c6..05a3f5e25b 100644
--- a/xen/arch/x86/hypercall.c
+++ b/xen/arch/x86/hypercall.c
@@ -73,6 +73,7 @@  const hypercall_args_t hypercall_args_table[NR_hypercalls] =
     ARGS(hvm_op, 2),
     ARGS(dm_op, 3),
 #endif
+    ARGS(hypfs_op, 5),
     ARGS(mca, 1),
     ARGS(arch_1, 1),
 };
diff --git a/xen/arch/x86/pv/hypercall.c b/xen/arch/x86/pv/hypercall.c
index 17ddf9ea1f..83907d4f00 100644
--- a/xen/arch/x86/pv/hypercall.c
+++ b/xen/arch/x86/pv/hypercall.c
@@ -85,6 +85,7 @@  const hypercall_table_t pv_hypercall_table[] = {
     HYPERCALL(hvm_op),
     COMPAT_CALL(dm_op),
 #endif
+    HYPERCALL(hypfs_op),
     HYPERCALL(mca),
     HYPERCALL(arch_1),
 };
diff --git a/xen/common/Makefile b/xen/common/Makefile
index 62b34e69e9..a3f66aa0c0 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -11,6 +11,7 @@  obj-y += domain.o
 obj-y += event_2l.o
 obj-y += event_channel.o
 obj-y += event_fifo.o
+obj-y += hypfs.o
 obj-$(CONFIG_CRASH_DEBUG) += gdbstub.o
 obj-$(CONFIG_GRANT_TABLE) += grant_table.o
 obj-y += guestcopy.o
diff --git a/xen/common/hypfs.c b/xen/common/hypfs.c
new file mode 100644
index 0000000000..6762d20dfd
--- /dev/null
+++ b/xen/common/hypfs.c
@@ -0,0 +1,365 @@ 
+/******************************************************************************
+ *
+ * hypfs.c
+ *
+ * Simple sysfs-like file system for the hypervisor.
+ */
+
+#include <xen/lib.h>
+#include <xen/err.h>
+#include <xen/guest_access.h>
+#include <xen/hypercall.h>
+#include <xen/hypfs.h>
+#include <xen/rwlock.h>
+#include <public/hypfs.h>
+
+#define DIRENTRY_NAME_OFF offsetof(struct xen_hypfs_dirlistentry, name)
+#define DIRENTRY_SIZE(name_len) \
+    (DIRENTRY_NAME_OFF + ROUNDUP(name_len, alignof(struct xen_hypfs_direntry)))
+
+static DEFINE_RWLOCK(hypfs_lock);
+
+HYPFS_DIR_INIT(hypfs_root, "");
+
+static int add_entry(struct hypfs_entry_dir *parent, struct hypfs_entry *new)
+{
+    int ret = -ENOENT;
+    struct hypfs_entry *e;
+
+    write_lock(&hypfs_lock);
+
+    list_for_each_entry ( e, &parent->dirlist, list )
+    {
+        int cmp = strcmp(e->name, new->name);
+
+        if ( cmp > 0 )
+        {
+            ret = 0;
+            list_add_tail(&new->list, &e->list);
+            break;
+        }
+        if ( cmp == 0 )
+        {
+            ret = -EEXIST;
+            break;
+        }
+    }
+
+    if ( ret == -ENOENT )
+    {
+        ret = 0;
+        list_add_tail(&new->list, &parent->dirlist);
+    }
+
+    if ( !ret )
+    {
+        unsigned int sz = strlen(new->name) + 1;
+
+        parent->e.size += DIRENTRY_SIZE(sz);
+    }
+
+    write_unlock(&hypfs_lock);
+
+    return ret;
+}
+
+int hypfs_add_entry(struct hypfs_entry_dir *parent,
+                    struct hypfs_entry *entry, bool nofault)
+{
+    int ret;
+
+    ret = add_entry(parent, entry);
+    BUG_ON(nofault && ret);
+
+    return ret;
+}
+
+int hypfs_add_dir(struct hypfs_entry_dir *parent,
+                  struct hypfs_entry_dir *dir, bool nofault)
+{
+    int ret;
+
+    ret = add_entry(parent, &dir->e);
+    BUG_ON(nofault && ret);
+
+    return ret;
+}
+
+int hypfs_add_leaf(struct hypfs_entry_dir *parent,
+                   struct hypfs_entry_leaf *leaf, bool nofault)
+{
+    int ret;
+
+    if ( !leaf->content )
+        ret = -EINVAL;
+    else
+        ret = add_entry(parent, &leaf->e);
+    BUG_ON(nofault && ret);
+
+    return ret;
+}
+
+static int hypfs_get_path_user(char *buf, XEN_GUEST_HANDLE_PARAM(void) uaddr,
+                               unsigned long len)
+{
+    if ( len > XEN_HYPFS_MAX_PATHLEN )
+        return -EINVAL;
+
+    if ( copy_from_guest(buf, uaddr, len) )
+        return -EFAULT;
+
+    if ( buf[len - 1] )
+        return -EINVAL;
+
+    return 0;
+}
+
+static struct hypfs_entry *hypfs_get_entry_rel(struct hypfs_entry_dir *dir,
+                                               const char *path)
+{
+    const char *end;
+    struct hypfs_entry *entry;
+    unsigned int name_len;
+
+    if ( !*path )
+        return &dir->e;
+
+    if ( dir->e.type != XEN_HYPFS_TYPE_DIR )
+        return NULL;
+
+    end = strchr(path, '/');
+    if ( !end )
+        end = strchr(path, '\0');
+    name_len = end - path;
+
+    list_for_each_entry ( entry, &dir->dirlist, list )
+    {
+        int cmp = strncmp(path, entry->name, name_len);
+	struct hypfs_entry_dir *d = container_of(entry,
+                                                 struct hypfs_entry_dir, e);
+
+        if ( cmp < 0 )
+            return NULL;
+        if ( !cmp && strlen(entry->name) == name_len )
+            return *end ? hypfs_get_entry_rel(d, end + 1) : entry;
+    }
+
+    return NULL;
+}
+
+struct hypfs_entry *hypfs_get_entry(const char *path)
+{
+    if ( path[0] != '/' )
+        return NULL;
+
+    return hypfs_get_entry_rel(&hypfs_root, path + 1);
+}
+
+int hypfs_read_dir(const struct hypfs_entry *entry,
+                   XEN_GUEST_HANDLE_PARAM(void) uaddr)
+{
+    const struct hypfs_entry_dir *d;
+    struct hypfs_entry *e;
+    unsigned int size = entry->size;
+
+    d = container_of(entry, const struct hypfs_entry_dir, e);
+
+    list_for_each_entry ( e, &d->dirlist, list )
+    {
+        struct xen_hypfs_dirlistentry direntry;
+        unsigned int e_namelen = strlen(e->name) + 1;
+        unsigned int e_len = DIRENTRY_SIZE(e_namelen);
+
+        direntry.e.flags = e->write ? XEN_HYPFS_WRITEABLE : 0;
+        direntry.e.type = e->type;
+        direntry.e.encoding = e->encoding;
+        direntry.e.content_len = e->size;
+        direntry.off_next = list_is_last(&e->list, &d->dirlist) ? 0 : e_len;
+        if ( copy_to_guest(uaddr, &direntry, 1) )
+            return -EFAULT;
+
+        if ( copy_to_guest_offset(uaddr, DIRENTRY_NAME_OFF,
+                                  e->name, e_namelen) )
+            return -EFAULT;
+
+        guest_handle_add_offset(uaddr, e_len);
+
+        ASSERT(e_len <= size);
+        size -= e_len;
+    }
+
+    return 0;
+}
+
+int hypfs_read_leaf(const struct hypfs_entry *entry,
+                    XEN_GUEST_HANDLE_PARAM(void) uaddr)
+{
+    const struct hypfs_entry_leaf *l;
+
+    l = container_of(entry, const struct hypfs_entry_leaf, e);
+
+    return copy_to_guest(uaddr, l->content, entry->size) ? -EFAULT: 0;
+}
+
+static int hypfs_read(const struct hypfs_entry *entry,
+                      XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
+{
+    struct xen_hypfs_direntry e;
+    long ret = -EINVAL;
+
+    if ( ulen < sizeof(e) )
+        goto out;
+
+    e.flags = entry->write ? XEN_HYPFS_WRITEABLE : 0;
+    e.type = entry->type;
+    e.encoding = entry->encoding;
+    e.content_len = entry->size;
+
+    ret = -EFAULT;
+    if ( copy_to_guest(uaddr, &e, 1) )
+        goto out;
+
+    ret = 0;
+    if ( ulen < entry->size + sizeof(e) )
+        goto out;
+
+    guest_handle_add_offset(uaddr, sizeof(e));
+
+    ret = entry->read(entry, uaddr);
+
+ out:
+    return ret;
+}
+
+int hypfs_write_leaf(struct hypfs_entry_leaf *leaf,
+                     XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
+{
+    char *buf;
+    int ret;
+
+    if ( ulen > leaf->e.size )
+        ulen = leaf->e.size;
+
+    buf = xzalloc_array(char, ulen);
+    if ( !buf )
+        return -ENOMEM;
+
+    ret = -EFAULT;
+    if ( copy_from_guest(buf, uaddr, ulen) )
+        goto out;
+
+    ret = 0;
+    if ( leaf->e.type == XEN_HYPFS_TYPE_STRING )
+        buf[leaf->e.size - 1] = 0;
+    memcpy(leaf->write_ptr, buf, ulen);
+
+ out:
+    xfree(buf);
+    return ret;
+}
+
+int hypfs_write_bool(struct hypfs_entry_leaf *leaf,
+                     XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
+{
+    union {
+        char buf[8];
+        uint8_t u8;
+        uint16_t u16;
+        uint32_t u32;
+        uint64_t u64;
+    } u;
+
+    ASSERT(leaf->e.type == XEN_HYPFS_TYPE_UINT && leaf->e.size <= 8);
+
+    if ( ulen != leaf->e.size )
+        return -EDOM;
+
+    if ( copy_from_guest(u.buf, uaddr, ulen) )
+        return -EFAULT;
+
+    switch ( leaf->e.size )
+    {
+    case 1:
+        *(uint8_t *)leaf->write_ptr = !!u.u8;
+        break;
+    case 2:
+        *(uint16_t *)leaf->write_ptr = !!u.u16;
+        break;
+    case 4:
+        *(uint32_t *)leaf->write_ptr = !!u.u32;
+        break;
+    case 8:
+        *(uint64_t *)leaf->write_ptr = !!u.u64;
+        break;
+    }
+
+    return 0;
+}
+
+static int hypfs_write(struct hypfs_entry *entry,
+                       XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen)
+{
+    struct hypfs_entry_leaf *l;
+
+    if ( !entry->write )
+        return -EACCES;
+
+    l = container_of(entry, struct hypfs_entry_leaf, e);
+
+    return entry->write(l, uaddr, ulen);
+}
+
+long do_hypfs_op(unsigned int cmd,
+                 XEN_GUEST_HANDLE_PARAM(void) arg1, unsigned long arg2,
+                 XEN_GUEST_HANDLE_PARAM(void) arg3, unsigned long arg4)
+{
+    int ret;
+    struct hypfs_entry *entry;
+    static char path[XEN_HYPFS_MAX_PATHLEN];
+
+    if ( !is_control_domain(current->domain) &&
+         !is_hardware_domain(current->domain) )
+        return -EPERM;
+
+    if ( cmd == XEN_HYPFS_OP_get_version )
+        return XEN_HYPFS_VERSION;
+
+    if ( cmd == XEN_HYPFS_OP_write_contents )
+        write_lock(&hypfs_lock);
+    else
+        read_lock(&hypfs_lock);
+
+    ret = hypfs_get_path_user(path, arg1, arg2);
+    if ( ret )
+        goto out;
+
+    entry = hypfs_get_entry(path);
+    if ( !entry )
+    {
+        ret = -ENOENT;
+        goto out;
+    }
+
+    switch ( cmd )
+    {
+    case XEN_HYPFS_OP_read_contents:
+        ret = hypfs_read(entry, arg3, arg4);
+        break;
+
+    case XEN_HYPFS_OP_write_contents:
+        ret = hypfs_write(entry, arg3, arg4);
+        break;
+
+    default:
+        ret = -ENOSYS;
+        break;
+    }
+
+ out:
+    if ( cmd == XEN_HYPFS_OP_write_contents )
+        write_unlock(&hypfs_lock);
+    else
+        read_unlock(&hypfs_lock);
+
+    return ret;
+}
diff --git a/xen/include/public/hypfs.h b/xen/include/public/hypfs.h
new file mode 100644
index 0000000000..8351511227
--- /dev/null
+++ b/xen/include/public/hypfs.h
@@ -0,0 +1,124 @@ 
+/******************************************************************************
+ * Xen Hypervisor Filesystem
+ *
+ * Copyright (c) 2019, SUSE Software Solutions Germany GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __XEN_PUBLIC_HYPFS_H__
+#define __XEN_PUBLIC_HYPFS_H__
+
+#include "xen.h"
+
+/*
+ * Definitions for the __HYPERVISOR_hypfs_op hypercall.
+ */
+
+/* Highest version number of the hypfs interface currently defined. */
+#define XEN_HYPFS_VERSION      1
+
+/* Maximum length of a path in the filesystem. */
+#define XEN_HYPFS_MAX_PATHLEN 1024
+
+struct xen_hypfs_direntry {
+    uint16_t flags;
+#define XEN_HYPFS_WRITEABLE    0x0001
+    uint8_t type;
+#define XEN_HYPFS_TYPE_DIR     0x0000
+#define XEN_HYPFS_TYPE_BLOB    0x0001
+#define XEN_HYPFS_TYPE_STRING  0x0002
+#define XEN_HYPFS_TYPE_UINT    0x0003
+#define XEN_HYPFS_TYPE_INT     0x0004
+#define XEN_HYPFS_TYPE_BOOL    0x0005
+    uint8_t encoding;
+#define XEN_HYPFS_ENC_PLAIN    0x0000
+#define XEN_HYPFS_ENC_GZIP     0x0001
+    uint32_t content_len;
+};
+
+struct xen_hypfs_dirlistentry {
+    struct xen_hypfs_direntry e;
+    /* Offset in bytes to next entry (0 == this is the last entry). */
+    uint16_t off_next;
+    char name[XEN_FLEX_ARRAY_DIM];
+};
+
+/*
+ * Hypercall operations.
+ */
+
+/*
+ * XEN_HYPFS_OP_get_version
+ *
+ * Read highest interface version supported by the hypervisor.
+ *
+ * Possible return values:
+ * >0: highest supported interface version
+ * <0: negative Xen errno value
+ */
+#define XEN_HYPFS_OP_get_version     0
+
+/*
+ * XEN_HYPFS_OP_read_contents
+ *
+ * Read contents of a filesystem entry.
+ *
+ * Returns the direntry and contents of an entry in the buffer supplied by the
+ * caller (struct xen_hypfs_direntry with the contents following directly
+ * after it).
+ * The data buffer must be at least the size of the direntry returned in order
+ * to have success. If the data buffer was not large enough for all the data
+ * no entry data is returned, but the direntry will contain the needed size
+ * for the returned data.
+ * The format of the contents is according to its entry type and encoding.
+ *
+ * arg1: XEN_GUEST_HANDLE(path name)
+ * arg2: length of path name (including trailing zero byte)
+ * arg3: XEN_GUEST_HANDLE(data buffer written by hypervisor)
+ * arg4: data buffer size
+ *
+ * Possible return values:
+ * 0: success (at least the direntry was returned)
+ * <0 : negative Xen errno value
+ */
+#define XEN_HYPFS_OP_read_contents     1
+
+/*
+ * XEN_HYPFS_OP_write_contents
+ *
+ * Write contents of a filesystem entry.
+ *
+ * Writes an entry with the contents of a buffer supplied by the caller.
+ * The data type and encoding can't be changed. The size can be changed only
+ * for blobs and strings.
+ *
+ * arg1: XEN_GUEST_HANDLE(path name)
+ * arg2: length of path name (including trailing zero byte)
+ * arg3: XEN_GUEST_HANDLE(content buffer read by hypervisor)
+ * arg4: content buffer size
+ *
+ * Possible return values:
+ * 0: success
+ * <0 : negative Xen errno value
+ */
+#define XEN_HYPFS_OP_write_contents    2
+
+#endif /* __XEN_PUBLIC_HYPFS_H__ */
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index d2198dffad..bf80f1da8c 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -130,6 +130,7 @@  DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
 #define __HYPERVISOR_argo_op              39
 #define __HYPERVISOR_xenpmu_op            40
 #define __HYPERVISOR_dm_op                41
+#define __HYPERVISOR_hypfs_op             42
 
 /* Architecture-specific hypercall definitions. */
 #define __HYPERVISOR_arch_0               48
diff --git a/xen/include/xen/hypercall.h b/xen/include/xen/hypercall.h
index ad8ad27b23..349a0f6487 100644
--- a/xen/include/xen/hypercall.h
+++ b/xen/include/xen/hypercall.h
@@ -150,6 +150,14 @@  do_dm_op(
     unsigned int nr_bufs,
     XEN_GUEST_HANDLE_PARAM(xen_dm_op_buf_t) bufs);
 
+extern long
+do_hypfs_op(
+    unsigned int cmd,
+    XEN_GUEST_HANDLE_PARAM(void) arg1,
+    unsigned long arg2,
+    XEN_GUEST_HANDLE_PARAM(void) arg3,
+    unsigned long arg4);
+
 #ifdef CONFIG_COMPAT
 
 extern int
diff --git a/xen/include/xen/hypfs.h b/xen/include/xen/hypfs.h
new file mode 100644
index 0000000000..a1c2140398
--- /dev/null
+++ b/xen/include/xen/hypfs.h
@@ -0,0 +1,89 @@ 
+#ifndef __XEN_HYPFS_H__
+#define __XEN_HYPFS_H__
+
+#include <xen/list.h>
+#include <xen/string.h>
+#include <public/hypfs.h>
+
+struct hypfs_entry_leaf;
+
+struct hypfs_entry {
+    unsigned short type;
+    unsigned short encoding;
+    unsigned int size;
+    const char *name;
+    struct list_head list;
+    int (*read)(const struct hypfs_entry *entry,
+                XEN_GUEST_HANDLE_PARAM(void) uaddr);
+    int (*write)(struct hypfs_entry_leaf *leaf,
+                 XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen);
+};
+
+struct hypfs_entry_leaf {
+    struct hypfs_entry e;
+    union {
+        const void *content;
+        void *write_ptr;
+    };
+};
+
+struct hypfs_entry_dir {
+    struct hypfs_entry e;
+    struct list_head dirlist;
+};
+
+#define HYPFS_DIR_INIT(var, nam)                \
+    struct hypfs_entry_dir var = {              \
+        .e.type = XEN_HYPFS_TYPE_DIR,           \
+        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
+        .e.name = nam,                          \
+        .e.size = 0,                            \
+        .e.list = LIST_HEAD_INIT(var.e.list),   \
+        .e.read = hypfs_read_dir,               \
+        .dirlist = LIST_HEAD_INIT(var.dirlist), \
+    }
+
+/* Content and size need to be set via hypfs_string_set(). */
+#define HYPFS_STRING_INIT(var, nam)             \
+    struct hypfs_entry_leaf var = {             \
+        .e.type = XEN_HYPFS_TYPE_STRING,        \
+        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
+        .e.name = nam,                          \
+        .e.read = hypfs_read_leaf,              \
+    }
+
+static inline void hypfs_string_set(struct hypfs_entry_leaf *leaf,
+                                    const char *str)
+{
+    leaf->content = str;
+    leaf->e.size = strlen(str) + 1;
+}
+
+#define HYPFS_UINT_INIT(var, nam, uint)         \
+    struct hypfs_entry_leaf var = {             \
+        .e.type = XEN_HYPFS_TYPE_UINT,          \
+        .e.encoding = XEN_HYPFS_ENC_PLAIN,      \
+        .e.name = nam,                          \
+        .e.size = sizeof(uint),                 \
+        .e.read = hypfs_read_leaf,              \
+        .content = &uint,                       \
+    }
+
+
+extern struct hypfs_entry_dir hypfs_root;
+
+struct hypfs_entry *hypfs_get_entry(const char *path);
+int hypfs_add_dir(struct hypfs_entry_dir *parent,
+                  struct hypfs_entry_dir *dir, bool nofault);
+int hypfs_add_leaf(struct hypfs_entry_dir *parent,
+                   struct hypfs_entry_leaf *leaf, bool nofault);
+int hypfs_read_dir(const struct hypfs_entry *entry,
+                   XEN_GUEST_HANDLE_PARAM(void) uaddr);
+int hypfs_read_leaf(const struct hypfs_entry *entry,
+                    XEN_GUEST_HANDLE_PARAM(void) uaddr);
+int hypfs_write_leaf(struct hypfs_entry_leaf *leaf,
+                     XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen);
+int hypfs_write_bool(struct hypfs_entry_leaf *leaf,
+                     XEN_GUEST_HANDLE_PARAM(void) uaddr, unsigned long ulen);
+
+#endif /* __XEN_HYPFS_H__ */