diff mbox series

[1/2] elf-ops.h: Map into memory the ELF to load

Message ID 20190723090718.14590-2-sgarzare@redhat.com (mailing list archive)
State New, archived
Headers show
Series pc: mmap kernel (ELF image) and initrd | expand

Commit Message

Stefano Garzarella July 23, 2019, 9:07 a.m. UTC
In order to reduce the memory footprint we map into memory
the ELF to load using g_mapped_file_new_from_fd() instead of
reading each sections. In this way we can share the ELF pages
between multiple instances of QEMU.

Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
 include/hw/elf_ops.h | 59 ++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 29 deletions(-)

Comments

Peter Maydell July 23, 2019, 9:32 a.m. UTC | #1
On Tue, 23 Jul 2019 at 10:08, Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> In order to reduce the memory footprint we map into memory
> the ELF to load using g_mapped_file_new_from_fd() instead of
> reading each sections. In this way we can share the ELF pages
> between multiple instances of QEMU.
>
> Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> ---
>  include/hw/elf_ops.h | 59 ++++++++++++++++++++++----------------------
>  1 file changed, 30 insertions(+), 29 deletions(-)
>
> diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
> index 690f9238c8..69ce8dea74 100644
> --- a/include/hw/elf_ops.h
> +++ b/include/hw/elf_ops.h
> @@ -323,8 +323,9 @@ static int glue(load_elf, SZ)(const char *name, int fd,
>      struct elfhdr ehdr;
>      struct elf_phdr *phdr = NULL, *ph;
>      int size, i, total_size;
> -    elf_word mem_size, file_size;
> +    elf_word mem_size, file_size, data_offset;
>      uint64_t addr, low = (uint64_t)-1, high = 0;
> +    GMappedFile *gmf = NULL;
>      uint8_t *data = NULL;
>      char label[128];
>      int ret = ELF_LOAD_FAILED;
> @@ -409,22 +410,26 @@ static int glue(load_elf, SZ)(const char *name, int fd,
>          }
>      }
>
> +    gmf = g_mapped_file_new_from_fd(fd, false, NULL);

Hmm. Here we pass 'false' for the writable argument,
meaning we promise not to modify the mapped buffer...

> +    if (!gmf) {
> +        goto fail;
> +    }
> +
>      total_size = 0;
>      for(i = 0; i < ehdr.e_phnum; i++) {
>          ph = &phdr[i];
>          if (ph->p_type == PT_LOAD) {
>              mem_size = ph->p_memsz; /* Size of the ROM */
>              file_size = ph->p_filesz; /* Size of the allocated data */
> -            data = g_malloc0(file_size);
> -            if (ph->p_filesz > 0) {
> -                if (lseek(fd, ph->p_offset, SEEK_SET) < 0) {
> -                    goto fail;
> -                }
> -                if (read(fd, data, file_size) != file_size) {
> -                    goto fail;
> -                }
> +            data_offset = ph->p_offset; /* Offset where the data is located */
> +
> +            if (g_mapped_file_get_length(gmf) < file_size + data_offset) {
> +                goto fail;
>              }
>
> +            data = (uint8_t *)g_mapped_file_get_contents(gmf);
> +            data += data_offset;

...but here we set up the 'data' pointer from the mapped contents,
and then in following code we will write to it in some situations --
look at the "if (data_swab)" case or the call to elf_reloc if we
have a translate_fn, for instance.

(We can't get out of this by just passing writable=true, because
we definitely don't want to be writing back to the underlying file.)

thanks
-- PMM
Stefano Garzarella July 23, 2019, 9:42 a.m. UTC | #2
On Tue, Jul 23, 2019 at 10:32:34AM +0100, Peter Maydell wrote:
> On Tue, 23 Jul 2019 at 10:08, Stefano Garzarella <sgarzare@redhat.com> wrote:
> >
> > In order to reduce the memory footprint we map into memory
> > the ELF to load using g_mapped_file_new_from_fd() instead of
> > reading each sections. In this way we can share the ELF pages
> > between multiple instances of QEMU.
> >
> > Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> > ---
> >  include/hw/elf_ops.h | 59 ++++++++++++++++++++++----------------------
> >  1 file changed, 30 insertions(+), 29 deletions(-)
> >
> > diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
> > index 690f9238c8..69ce8dea74 100644
> > --- a/include/hw/elf_ops.h
> > +++ b/include/hw/elf_ops.h
> > @@ -323,8 +323,9 @@ static int glue(load_elf, SZ)(const char *name, int fd,
> >      struct elfhdr ehdr;
> >      struct elf_phdr *phdr = NULL, *ph;
> >      int size, i, total_size;
> > -    elf_word mem_size, file_size;
> > +    elf_word mem_size, file_size, data_offset;
> >      uint64_t addr, low = (uint64_t)-1, high = 0;
> > +    GMappedFile *gmf = NULL;
> >      uint8_t *data = NULL;
> >      char label[128];
> >      int ret = ELF_LOAD_FAILED;
> > @@ -409,22 +410,26 @@ static int glue(load_elf, SZ)(const char *name, int fd,
> >          }
> >      }
> >
> > +    gmf = g_mapped_file_new_from_fd(fd, false, NULL);
> 
> Hmm. Here we pass 'false' for the writable argument,
> meaning we promise not to modify the mapped buffer...
> 
> > +    if (!gmf) {
> > +        goto fail;
> > +    }
> > +
> >      total_size = 0;
> >      for(i = 0; i < ehdr.e_phnum; i++) {
> >          ph = &phdr[i];
> >          if (ph->p_type == PT_LOAD) {
> >              mem_size = ph->p_memsz; /* Size of the ROM */
> >              file_size = ph->p_filesz; /* Size of the allocated data */
> > -            data = g_malloc0(file_size);
> > -            if (ph->p_filesz > 0) {
> > -                if (lseek(fd, ph->p_offset, SEEK_SET) < 0) {
> > -                    goto fail;
> > -                }
> > -                if (read(fd, data, file_size) != file_size) {
> > -                    goto fail;
> > -                }
> > +            data_offset = ph->p_offset; /* Offset where the data is located */
> > +
> > +            if (g_mapped_file_get_length(gmf) < file_size + data_offset) {
> > +                goto fail;
> >              }
> >
> > +            data = (uint8_t *)g_mapped_file_get_contents(gmf);
> > +            data += data_offset;
> 
> ...but here we set up the 'data' pointer from the mapped contents,
> and then in following code we will write to it in some situations --
> look at the "if (data_swab)" case or the call to elf_reloc if we
> have a translate_fn, for instance.
> 

Reading the 'g_mapped_file_new_from_fd()' docs [1]:
"If writable is TRUE, the mapped buffer may be modified, otherwise it is an
error to modify the mapped buffer. Modifications to the buffer are not visible
to other processes mapping the same file, and are not written back to the file."

I don't know what "error" means, but reading the second part I thought
the changes in that case were only visible at the current process.

I'll test it to understand better the behavior. If we can't touch it, then we
have to make a copy in these cases.

> (We can't get out of this by just passing writable=true, because
> we definitely don't want to be writing back to the underlying file.)

Yes, I agree.


Thanks,
Stefano

[1] https://developer.gnome.org/glib/stable/glib-File-Utilities.html#g-mapped-file-new-from-fd
Paolo Bonzini July 23, 2019, 9:49 a.m. UTC | #3
On 23/07/19 11:42, Stefano Garzarella wrote:
> "If writable is TRUE, the mapped buffer may be modified, otherwise it is an
> error to modify the mapped buffer. Modifications to the buffer are not visible
> to other processes mapping the same file, and are not written back to the file."
> 
> I don't know what "error" means, but reading the second part I thought
> the changes in that case were only visible at the current process.

My reading would be that the second part applies to the writable==TRUE
case.  In fact, the glib source code agrees:

      file->contents = (gchar *) mmap (NULL,  file->length,
                           writable ? PROT_READ|PROT_WRITE : PROT_READ,
                           MAP_PRIVATE, fd, 0);

meaning that we could after all just use writable == true.

Paolo

> I'll test it to understand better the behavior. If we can't touch it, then we
> have to make a copy in these cases.
> 
>> (We can't get out of this by just passing writable=true, because
>> we definitely don't want to be writing back to the underlying file.)
> Yes, I agree.
Peter Maydell July 23, 2019, 9:50 a.m. UTC | #4
On Tue, 23 Jul 2019 at 10:42, Stefano Garzarella <sgarzare@redhat.com> wrote:
> Reading the 'g_mapped_file_new_from_fd()' docs [1]:
> "If writable is TRUE, the mapped buffer may be modified, otherwise it is an
> error to modify the mapped buffer. Modifications to the buffer are not visible
> to other processes mapping the same file, and are not written back to the file."
>
> I don't know what "error" means, but reading the second part I thought
> the changes in that case were only visible at the current process.

Ah, I misread the docs here (and thought the following paragraph
which talks about changes to the underlying file becoming visible
to the mapping process was talking about changes in the mapping
process becoming visible to the file).

So I think the answer is that we do want to pass writable=true.

Looking at the implementation, we always use mmap()'s MAP_PRIVATE,
so we get a copy-on-write mapping that doesn't change the underlying
file. The effect of the 'writable' flag is that we use PROT_READ|PROT_WRITE,
so if we don't pass writable=true we're liable to get a segfault.

thanks
-- PMM
Stefano Garzarella July 23, 2019, 10:10 a.m. UTC | #5
On Tue, Jul 23, 2019 at 11:49:13AM +0200, Paolo Bonzini wrote:
> On 23/07/19 11:42, Stefano Garzarella wrote:
> > "If writable is TRUE, the mapped buffer may be modified, otherwise it is an
> > error to modify the mapped buffer. Modifications to the buffer are not visible
> > to other processes mapping the same file, and are not written back to the file."
> > 
> > I don't know what "error" means, but reading the second part I thought
> > the changes in that case were only visible at the current process.
> 
> My reading would be that the second part applies to the writable==TRUE
> case.  In fact, the glib source code agrees:
> 
>       file->contents = (gchar *) mmap (NULL,  file->length,
>                            writable ? PROT_READ|PROT_WRITE : PROT_READ,
>                            MAP_PRIVATE, fd, 0);
> 
> meaning that we could after all just use writable == true.

Thanks for checking! I'll use writable == true in the v2!

Stefano
Stefano Garzarella July 23, 2019, 10:12 a.m. UTC | #6
On Tue, Jul 23, 2019 at 10:50:24AM +0100, Peter Maydell wrote:
> On Tue, 23 Jul 2019 at 10:42, Stefano Garzarella <sgarzare@redhat.com> wrote:
> > Reading the 'g_mapped_file_new_from_fd()' docs [1]:
> > "If writable is TRUE, the mapped buffer may be modified, otherwise it is an
> > error to modify the mapped buffer. Modifications to the buffer are not visible
> > to other processes mapping the same file, and are not written back to the file."
> >
> > I don't know what "error" means, but reading the second part I thought
> > the changes in that case were only visible at the current process.
> 
> Ah, I misread the docs here (and thought the following paragraph
> which talks about changes to the underlying file becoming visible
> to the mapping process was talking about changes in the mapping
> process becoming visible to the file).

I misread too...

> 
> So I think the answer is that we do want to pass writable=true.

Yes, I'll do in the v2!

> 
> Looking at the implementation, we always use mmap()'s MAP_PRIVATE,
> so we get a copy-on-write mapping that doesn't change the underlying
> file. The effect of the 'writable' flag is that we use PROT_READ|PROT_WRITE,
> so if we don't pass writable=true we're liable to get a segfault.

Yes, I just tried and I got the segfault.

Thanks,
Stefano
diff mbox series

Patch

diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index 690f9238c8..69ce8dea74 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -323,8 +323,9 @@  static int glue(load_elf, SZ)(const char *name, int fd,
     struct elfhdr ehdr;
     struct elf_phdr *phdr = NULL, *ph;
     int size, i, total_size;
-    elf_word mem_size, file_size;
+    elf_word mem_size, file_size, data_offset;
     uint64_t addr, low = (uint64_t)-1, high = 0;
+    GMappedFile *gmf = NULL;
     uint8_t *data = NULL;
     char label[128];
     int ret = ELF_LOAD_FAILED;
@@ -409,22 +410,26 @@  static int glue(load_elf, SZ)(const char *name, int fd,
         }
     }
 
+    gmf = g_mapped_file_new_from_fd(fd, false, NULL);
+    if (!gmf) {
+        goto fail;
+    }
+
     total_size = 0;
     for(i = 0; i < ehdr.e_phnum; i++) {
         ph = &phdr[i];
         if (ph->p_type == PT_LOAD) {
             mem_size = ph->p_memsz; /* Size of the ROM */
             file_size = ph->p_filesz; /* Size of the allocated data */
-            data = g_malloc0(file_size);
-            if (ph->p_filesz > 0) {
-                if (lseek(fd, ph->p_offset, SEEK_SET) < 0) {
-                    goto fail;
-                }
-                if (read(fd, data, file_size) != file_size) {
-                    goto fail;
-                }
+            data_offset = ph->p_offset; /* Offset where the data is located */
+
+            if (g_mapped_file_get_length(gmf) < file_size + data_offset) {
+                goto fail;
             }
 
+            data = (uint8_t *)g_mapped_file_get_contents(gmf);
+            data += data_offset;
+
             /* The ELF spec is somewhat vague about the purpose of the
              * physical address field. One common use in the embedded world
              * is that physical address field specifies the load address
@@ -513,17 +518,16 @@  static int glue(load_elf, SZ)(const char *name, int fd,
                 *pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr;
             }
 
-            if (mem_size == 0) {
-                /* Some ELF files really do have segments of zero size;
-                 * just ignore them rather than trying to create empty
-                 * ROM blobs, because the zero-length blob can falsely
-                 * trigger the overlapping-ROM-blobs check.
-                 */
-                g_free(data);
-            } else {
+            /* Some ELF files really do have segments of zero size;
+             * just ignore them rather than trying to create empty
+             * ROM blobs, because the zero-length blob can falsely
+             * trigger the overlapping-ROM-blobs check.
+             */
+            if (mem_size != 0) {
                 if (load_rom) {
                     snprintf(label, sizeof(label), "phdr #%d: %s", i, name);
-
+                    /* Increments the reference count to avoid the unmap */
+                    g_mapped_file_ref(gmf);
                     /* rom_add_elf_program() seize the ownership of 'data' */
                     rom_add_elf_program(label, data, file_size, mem_size,
                                         addr, as);
@@ -531,7 +535,6 @@  static int glue(load_elf, SZ)(const char *name, int fd,
                     address_space_write(as ? as : &address_space_memory,
                                         addr, MEMTXATTRS_UNSPECIFIED,
                                         data, file_size);
-                    g_free(data);
                 }
             }
 
@@ -547,16 +550,15 @@  static int glue(load_elf, SZ)(const char *name, int fd,
             struct elf_note *nhdr = NULL;
 
             file_size = ph->p_filesz; /* Size of the range of ELF notes */
-            data = g_malloc0(file_size);
-            if (ph->p_filesz > 0) {
-                if (lseek(fd, ph->p_offset, SEEK_SET) < 0) {
-                    goto fail;
-                }
-                if (read(fd, data, file_size) != file_size) {
-                    goto fail;
-                }
+            data_offset = ph->p_offset; /* Offset where the notes are located */
+
+            if (g_mapped_file_get_length(gmf) < file_size + data_offset) {
+                goto fail;
             }
 
+            data = (uint8_t *)g_mapped_file_get_contents(gmf);
+            data += data_offset;
+
             /*
              * Search the ELF notes to find one with a type matching the
              * value passed in via 'translate_opaque'
@@ -570,7 +572,6 @@  static int glue(load_elf, SZ)(const char *name, int fd,
                     sizeof(struct elf_note) == sizeof(struct elf64_note);
                 elf_note_fn((void *)nhdr, (void *)&ph->p_align, is64);
             }
-            g_free(data);
             data = NULL;
         }
     }
@@ -582,7 +583,7 @@  static int glue(load_elf, SZ)(const char *name, int fd,
         *highaddr = (uint64_t)(elf_sword)high;
     return total_size;
  fail:
-    g_free(data);
+    g_mapped_file_unref(gmf);
     g_free(phdr);
     return ret;
 }