diff mbox

[v4,11/14] xen/x86: parse Dom0 kernel for PVHv2

Message ID 20161130164950.43543-12-roger.pau@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roger Pau Monné Nov. 30, 2016, 4:49 p.m. UTC
Introduce a helper to parse the Dom0 kernel.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
---
Changes since v3:
 - Change one error message.
 - Indent "out" label by one space.
 - Introduce hvm_copy_to_phys and slightly simplify the code in hvm_load_kernel.

Changes since v2:
 - Remove debug messages.
 - Don't hardcode the number of modules to 1.
---
 xen/arch/x86/domain_build.c | 145 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)

Comments

Jan Beulich Dec. 9, 2016, 5:05 p.m. UTC | #1
>>> On 30.11.16 at 17:49, <roger.pau@citrix.com> wrote:
> @@ -1930,12 +1931,148 @@ static int __init hvm_setup_p2m(struct domain *d)
>  #undef MB1_PAGES
>  }
>  
> +static int __init hvm_copy_to_phys(struct domain *d, paddr_t paddr, void *buf,
> +                                   int size)

I guess you made size plain int because hvm_copy_to_guest_phys()
has it that way, but please let's not spread such bogus things - sizes
can't possibly be negative.

> +{
> +    struct vcpu *saved_current;
> +    int rc;
> +
> +    saved_current = current;
> +    set_current(d->vcpu[0]);
> +    rc = hvm_copy_to_guest_phys(paddr, buf, size);
> +    set_current(saved_current);

I continue to be uncertain about the behavior of this if something
inside hvm_copy_to_guest_phys() goes wrong: Did you either
statically analyze the code or try in practice out whether the
playing with current makes understanding the crash output any
harder?

While there's going to be some work involved with it, I do think
that the use here might be a reason for the whole hvm_copy()
machinery to gain a struct vcpu* parameter.

> +static int __init hvm_load_kernel(struct domain *d, const module_t *image,
> +                                  unsigned long image_headroom,
> +                                  module_t *initrd, char *image_base,
> +                                  char *cmdline, paddr_t *entry,
> +                                  paddr_t *start_info_addr)
> +{
> +    char *image_start = image_base + image_headroom;
> +    unsigned long image_len = image->mod_end;
> +    struct elf_binary elf;
> +    struct elf_dom_parms parms;
> +    paddr_t last_addr;
> +    struct hvm_start_info start_info;
> +    struct hvm_modlist_entry mod;
> +    struct vcpu *saved_current, *v = d->vcpu[0];
> +    int rc;
> +
> +    if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 )
> +    {
> +        printk("Error trying to detect bz compressed kernel\n");
> +        return rc;
> +    }
> +
> +    if ( (rc = elf_init(&elf, image_start, image_len)) != 0 )
> +    {
> +        printk("Unable to init ELF\n");
> +        return rc;
> +    }
> +#ifdef VERBOSE
> +    elf_set_verbose(&elf);
> +#endif
> +    elf_parse_binary(&elf);
> +    if ( (rc = elf_xen_parse(&elf, &parms)) != 0 )
> +    {
> +        printk("Unable to parse kernel for ELFNOTES\n");
> +        return rc;
> +    }
> +
> +    if ( parms.phys_entry == UNSET_ADDR32 ) {
> +        printk("Unable to find XEN_ELFNOTE_PHYS32_ENTRY address\n");
> +        return -EINVAL;
> +    }
> +
> +    printk("OS: %s version: %s loader: %s bitness: %s\n", parms.guest_os,
> +           parms.guest_ver, parms.loader,
> +           elf_64bit(&elf) ? "64-bit" : "32-bit");
> +
> +    /* Copy the OS image and free temporary buffer. */
> +    elf.dest_base = (void *)(parms.virt_kstart - parms.virt_base);
> +    elf.dest_size = parms.virt_kend - parms.virt_kstart;
> +
> +    saved_current = current;
> +    set_current(v);
> +    rc = elf_load_binary(&elf);
> +    set_current(saved_current);

Same reservations as above.

> +    if ( rc < 0 )
> +    {
> +        printk("Failed to load kernel: %d\n", rc);
> +        printk("Xen dom0 kernel broken ELF: %s\n", elf_check_broken(&elf));
> +        return rc;
> +    }
> +
> +    last_addr = ROUNDUP(parms.virt_kend - parms.virt_base, PAGE_SIZE);
> +
> +    if ( initrd != NULL )
> +    {
> +        rc = hvm_copy_to_phys(d, last_addr, mfn_to_virt(initrd->mod_start),
> +                              initrd->mod_end);
> +        if ( rc )
> +        {
> +            printk("Unable to copy initrd to guest\n");
> +            return rc;
> +        }
> +
> +        mod.paddr = last_addr;
> +        mod.size = initrd->mod_end;
> +        last_addr += ROUNDUP(initrd->mod_end, PAGE_SIZE);
> +    }

mod is left uninitialized in the else case afaict - I don't think all
compilers we support (plus Coverity) can spot the common
dependency on initrd != NULL.

Jan
Roger Pau Monné Dec. 20, 2016, 5:34 p.m. UTC | #2
On Fri, Dec 09, 2016 at 10:05:18AM -0700, Jan Beulich wrote:
> >>> On 30.11.16 at 17:49, <roger.pau@citrix.com> wrote:
> > @@ -1930,12 +1931,148 @@ static int __init hvm_setup_p2m(struct domain *d)
> >  #undef MB1_PAGES
> >  }
> >  
> > +static int __init hvm_copy_to_phys(struct domain *d, paddr_t paddr, void *buf,
> > +                                   int size)
> 
> I guess you made size plain int because hvm_copy_to_guest_phys()
> has it that way, but please let's not spread such bogus things - sizes
> can't possibly be negative.
> 
> > +{
> > +    struct vcpu *saved_current;
> > +    int rc;
> > +
> > +    saved_current = current;
> > +    set_current(d->vcpu[0]);
> > +    rc = hvm_copy_to_guest_phys(paddr, buf, size);
> > +    set_current(saved_current);
> 
> I continue to be uncertain about the behavior of this if something
> inside hvm_copy_to_guest_phys() goes wrong: Did you either
> statically analyze the code or try in practice out whether the
> playing with current makes understanding the crash output any
> harder?

If you managed to somehow call hvm_copy_to_guest_phys with the idle vcpu as
current you would get this kind of error, which I admin is maybe not that
obvious (apart from the IDLEv0 prefix).

(XEN) IDLEv0 Error pfn 21bd: rd=32767 od=32756 caf=180000000000000 taf=0000000000000000

See below.

> While there's going to be some work involved with it, I do think
> that the use here might be a reason for the whole hvm_copy()
> machinery to gain a struct vcpu* parameter.

I've gone that route and added a new param to __hvm_copy, and also introduced
hvm_copy_to_guest_phys_vcpu which takes an additional vcpu parameter. While
there I've also added an assert to __hvm_copy in order to make sure the
vcpu parameter is always a hvm/pvh vcpu.

> > +static int __init hvm_load_kernel(struct domain *d, const module_t *image,
> > +                                  unsigned long image_headroom,
> > +                                  module_t *initrd, char *image_base,
> > +                                  char *cmdline, paddr_t *entry,
> > +                                  paddr_t *start_info_addr)
> > +{
> > +    char *image_start = image_base + image_headroom;
> > +    unsigned long image_len = image->mod_end;
> > +    struct elf_binary elf;
> > +    struct elf_dom_parms parms;
> > +    paddr_t last_addr;
> > +    struct hvm_start_info start_info;
> > +    struct hvm_modlist_entry mod;
> > +    struct vcpu *saved_current, *v = d->vcpu[0];
> > +    int rc;
> > +
> > +    if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 )
> > +    {
> > +        printk("Error trying to detect bz compressed kernel\n");
> > +        return rc;
> > +    }
> > +
> > +    if ( (rc = elf_init(&elf, image_start, image_len)) != 0 )
> > +    {
> > +        printk("Unable to init ELF\n");
> > +        return rc;
> > +    }
> > +#ifdef VERBOSE
> > +    elf_set_verbose(&elf);
> > +#endif
> > +    elf_parse_binary(&elf);
> > +    if ( (rc = elf_xen_parse(&elf, &parms)) != 0 )
> > +    {
> > +        printk("Unable to parse kernel for ELFNOTES\n");
> > +        return rc;
> > +    }
> > +
> > +    if ( parms.phys_entry == UNSET_ADDR32 ) {
> > +        printk("Unable to find XEN_ELFNOTE_PHYS32_ENTRY address\n");
> > +        return -EINVAL;
> > +    }
> > +
> > +    printk("OS: %s version: %s loader: %s bitness: %s\n", parms.guest_os,
> > +           parms.guest_ver, parms.loader,
> > +           elf_64bit(&elf) ? "64-bit" : "32-bit");
> > +
> > +    /* Copy the OS image and free temporary buffer. */
> > +    elf.dest_base = (void *)(parms.virt_kstart - parms.virt_base);
> > +    elf.dest_size = parms.virt_kend - parms.virt_kstart;
> > +
> > +    saved_current = current;
> > +    set_current(v);
> > +    rc = elf_load_binary(&elf);
> > +    set_current(saved_current);
> 
> Same reservations as above.

Right, this one however is more tricky to fix since elf_load_binary is shared
with libxc, so adding a vcpu/domain parameter here is problematic for the
toolstack side. That's quite similar to what happens on classic PV Dom0
creation, we need to switch to Dom0 page tables. I'm not trying to use that to
justify that this is the best way, but everything else seems quite convoluted
(either adding a new param to elf_load_binary or a new field to struct
elf_binary in order to store the domain/vcpu).

> > +    if ( rc < 0 )
> > +    {
> > +        printk("Failed to load kernel: %d\n", rc);
> > +        printk("Xen dom0 kernel broken ELF: %s\n", elf_check_broken(&elf));
> > +        return rc;
> > +    }
> > +
> > +    last_addr = ROUNDUP(parms.virt_kend - parms.virt_base, PAGE_SIZE);
> > +
> > +    if ( initrd != NULL )
> > +    {
> > +        rc = hvm_copy_to_phys(d, last_addr, mfn_to_virt(initrd->mod_start),
> > +                              initrd->mod_end);
> > +        if ( rc )
> > +        {
> > +            printk("Unable to copy initrd to guest\n");
> > +            return rc;
> > +        }
> > +
> > +        mod.paddr = last_addr;
> > +        mod.size = initrd->mod_end;
> > +        last_addr += ROUNDUP(initrd->mod_end, PAGE_SIZE);
> > +    }
> 
> mod is left uninitialized in the else case afaict - I don't think all
> compilers we support (plus Coverity) can spot the common
> dependency on initrd != NULL.

Clang doesn't seem to complain, but I will add an initialized to be sure.

Thanks, Roger.
diff mbox

Patch

diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index 8602566..e40fb94 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -39,6 +39,7 @@ 
 #include <asm/hpet.h>
 
 #include <public/version.h>
+#include <public/arch-x86/hvm/start_info.h>
 
 static long __initdata dom0_nrpages;
 static long __initdata dom0_min_nrpages;
@@ -1930,12 +1931,148 @@  static int __init hvm_setup_p2m(struct domain *d)
 #undef MB1_PAGES
 }
 
+static int __init hvm_copy_to_phys(struct domain *d, paddr_t paddr, void *buf,
+                                   int size)
+{
+    struct vcpu *saved_current;
+    int rc;
+
+    saved_current = current;
+    set_current(d->vcpu[0]);
+    rc = hvm_copy_to_guest_phys(paddr, buf, size);
+    set_current(saved_current);
+
+    return rc != HVMCOPY_okay ? -EFAULT : 0;
+}
+
+static int __init hvm_load_kernel(struct domain *d, const module_t *image,
+                                  unsigned long image_headroom,
+                                  module_t *initrd, char *image_base,
+                                  char *cmdline, paddr_t *entry,
+                                  paddr_t *start_info_addr)
+{
+    char *image_start = image_base + image_headroom;
+    unsigned long image_len = image->mod_end;
+    struct elf_binary elf;
+    struct elf_dom_parms parms;
+    paddr_t last_addr;
+    struct hvm_start_info start_info;
+    struct hvm_modlist_entry mod;
+    struct vcpu *saved_current, *v = d->vcpu[0];
+    int rc;
+
+    if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 )
+    {
+        printk("Error trying to detect bz compressed kernel\n");
+        return rc;
+    }
+
+    if ( (rc = elf_init(&elf, image_start, image_len)) != 0 )
+    {
+        printk("Unable to init ELF\n");
+        return rc;
+    }
+#ifdef VERBOSE
+    elf_set_verbose(&elf);
+#endif
+    elf_parse_binary(&elf);
+    if ( (rc = elf_xen_parse(&elf, &parms)) != 0 )
+    {
+        printk("Unable to parse kernel for ELFNOTES\n");
+        return rc;
+    }
+
+    if ( parms.phys_entry == UNSET_ADDR32 ) {
+        printk("Unable to find XEN_ELFNOTE_PHYS32_ENTRY address\n");
+        return -EINVAL;
+    }
+
+    printk("OS: %s version: %s loader: %s bitness: %s\n", parms.guest_os,
+           parms.guest_ver, parms.loader,
+           elf_64bit(&elf) ? "64-bit" : "32-bit");
+
+    /* Copy the OS image and free temporary buffer. */
+    elf.dest_base = (void *)(parms.virt_kstart - parms.virt_base);
+    elf.dest_size = parms.virt_kend - parms.virt_kstart;
+
+    saved_current = current;
+    set_current(v);
+    rc = elf_load_binary(&elf);
+    set_current(saved_current);
+    if ( rc < 0 )
+    {
+        printk("Failed to load kernel: %d\n", rc);
+        printk("Xen dom0 kernel broken ELF: %s\n", elf_check_broken(&elf));
+        return rc;
+    }
+
+    last_addr = ROUNDUP(parms.virt_kend - parms.virt_base, PAGE_SIZE);
+
+    if ( initrd != NULL )
+    {
+        rc = hvm_copy_to_phys(d, last_addr, mfn_to_virt(initrd->mod_start),
+                              initrd->mod_end);
+        if ( rc )
+        {
+            printk("Unable to copy initrd to guest\n");
+            return rc;
+        }
+
+        mod.paddr = last_addr;
+        mod.size = initrd->mod_end;
+        last_addr += ROUNDUP(initrd->mod_end, PAGE_SIZE);
+    }
+
+    /* Free temporary buffers. */
+    discard_initial_images();
+
+    memset(&start_info, 0, sizeof(start_info));
+    if ( cmdline != NULL )
+    {
+        rc = hvm_copy_to_phys(d, last_addr, cmdline, strlen(cmdline) + 1);
+        if ( rc )
+        {
+            printk("Unable to copy guest command line\n");
+            return rc;
+        }
+        start_info.cmdline_paddr = last_addr;
+        last_addr += ROUNDUP(strlen(cmdline) + 1, 8);
+    }
+    if ( initrd != NULL )
+    {
+        rc = hvm_copy_to_phys(d, last_addr, &mod, sizeof(mod));
+        if ( rc )
+        {
+            printk("Unable to copy guest modules\n");
+            return rc;
+        }
+        start_info.modlist_paddr = last_addr;
+        start_info.nr_modules = 1;
+        last_addr += sizeof(mod);
+    }
+
+    start_info.magic = XEN_HVM_START_MAGIC_VALUE;
+    start_info.flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
+    rc = hvm_copy_to_phys(d, last_addr, &start_info, sizeof(start_info));
+    if ( rc )
+    {
+        printk("Unable to copy start info to guest\n");
+        return rc;
+    }
+
+    *entry = parms.phys_entry;
+    *start_info_addr = last_addr;
+
+    return 0;
+}
+
 static int __init construct_dom0_hvm(struct domain *d, const module_t *image,
                                      unsigned long image_headroom,
                                      module_t *initrd,
                                      void *(*bootstrap_map)(const module_t *),
                                      char *cmdline)
 {
+    paddr_t entry, start_info;
     int rc;
 
     printk("** Building a PVH Dom0 **\n");
@@ -1953,6 +2090,14 @@  static int __init construct_dom0_hvm(struct domain *d, const module_t *image,
         return rc;
     }
 
+    rc = hvm_load_kernel(d, image, image_headroom, initrd, bootstrap_map(image),
+                         cmdline, &entry, &start_info);
+    if ( rc )
+    {
+        printk("Failed to load Dom0 kernel\n");
+        return rc;
+    }
+
     return 0;
 }