diff mbox

[v2,02/42] exec: Fix memory allocation when memory path isn't on hugetlbfs

Message ID 1457378754-21649-3-git-send-email-armbru@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Markus Armbruster March 7, 2016, 7:25 p.m. UTC
gethugepagesize() works reliably only when its argument is on
hugetlbfs.  When it's not, it returns the filesystem's "optimal
transfer block size", which may or may not be the actual page size
you'll get when you mmap().

If the value is too small or not a power of two, we fail
qemu_ram_mmap()'s assertions.  These were added in commit 794e8f3
(v2.5.0).  The bug's impact before that is currently unknown.  Seems
fairly unlikely at least when the normal page size is 4KiB.

Else, if the value is too large, we align more strictly than
necessary.

gethugepagesize() goes back to commit c902760 (v0.13).  That commit
clearly intended gethugepagesize() to be used on hugetlbfs only.  Not
only was it named accordingly, it also printed a warning when used on
anything else.  However, the commit neglected to spell out the
restriction in user documentation of -mem-path.

Commit bfc2a1a (v2.5.0) dropped the warning as bogus "because QEMU
functions perfectly well with the path on a regular tmpfs filesystem".
It sure does when you're sufficiently lucky.  In my testing, I was
lucky, too.

Fix by switching to qemu_fd_getpagesize().  Rename the variable
holding its result from hpagesize to page_size.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 exec.c | 40 +++++++---------------------------------
 1 file changed, 7 insertions(+), 33 deletions(-)

Comments

Paolo Bonzini March 8, 2016, 2:17 p.m. UTC | #1
On 07/03/2016 20:25, Markus Armbruster wrote:
> gethugepagesize() works reliably only when its argument is on
> hugetlbfs.  When it's not, it returns the filesystem's "optimal
> transfer block size", which may or may not be the actual page size
> you'll get when you mmap().
> 
> If the value is too small or not a power of two, we fail
> qemu_ram_mmap()'s assertions.  These were added in commit 794e8f3
> (v2.5.0).  The bug's impact before that is currently unknown.  Seems
> fairly unlikely at least when the normal page size is 4KiB.
> 
> Else, if the value is too large, we align more strictly than
> necessary.
> 
> gethugepagesize() goes back to commit c902760 (v0.13).  That commit
> clearly intended gethugepagesize() to be used on hugetlbfs only.  Not
> only was it named accordingly, it also printed a warning when used on
> anything else.  However, the commit neglected to spell out the
> restriction in user documentation of -mem-path.
> 
> Commit bfc2a1a (v2.5.0) dropped the warning as bogus "because QEMU
> functions perfectly well with the path on a regular tmpfs filesystem".
> It sure does when you're sufficiently lucky.  In my testing, I was
> lucky, too.
> 
> Fix by switching to qemu_fd_getpagesize().  Rename the variable
> holding its result from hpagesize to page_size.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> ---
>  exec.c | 40 +++++++---------------------------------
>  1 file changed, 7 insertions(+), 33 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 5275ff4..d41194e 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1207,27 +1207,6 @@ void qemu_mutex_unlock_ramlist(void)
>  }
>  
>  #ifdef __linux__
> -
> -#include <sys/vfs.h>
> -
> -#define HUGETLBFS_MAGIC       0x958458f6
> -
> -static long gethugepagesize(int fd)
> -{
> -    struct statfs fs;
> -    int ret;
> -
> -    do {
> -        ret = fstatfs(fd, &fs);
> -    } while (ret != 0 && errno == EINTR);
> -
> -    if (ret != 0) {
> -        return -1;
> -    }
> -
> -    return fs.f_bsize;
> -}
> -
>  static void *file_ram_alloc(RAMBlock *block,
>                              ram_addr_t memory,
>                              const char *path,
> @@ -1239,7 +1218,7 @@ static void *file_ram_alloc(RAMBlock *block,
>      char *c;
>      void *area;
>      int fd;
> -    int64_t hpagesize;
> +    int64_t page_size;
>  
>      if (kvm_enabled() && !kvm_has_sync_mmu()) {
>          error_setg(errp,
> @@ -1294,22 +1273,17 @@ static void *file_ram_alloc(RAMBlock *block,
>           */
>      }
>  
> -    hpagesize = gethugepagesize(fd);
> -    if (hpagesize < 0) {
> -        error_setg_errno(errp, errno, "can't get page size for %s",
> -                         path);
> -        goto error;
> -    }
> -    block->mr->align = hpagesize;
> +    page_size = qemu_fd_getpagesize(fd);
> +    block->mr->align = page_size;
>  
> -    if (memory < hpagesize) {
> +    if (memory < page_size) {
>          error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
>                     "or larger than page size 0x%" PRIx64,
> -                   memory, hpagesize);
> +                   memory, page_size);
>          goto error;
>      }
>  
> -    memory = ROUND_UP(memory, hpagesize);
> +    memory = ROUND_UP(memory, page_size);
>  
>      /*
>       * ftruncate is not supported by hugetlbfs in older
> @@ -1321,7 +1295,7 @@ static void *file_ram_alloc(RAMBlock *block,
>          perror("ftruncate");
>      }
>  
> -    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
> +    area = qemu_ram_mmap(fd, memory, page_size, block->flags & RAM_SHARED);
>      if (area == MAP_FAILED) {
>          error_setg_errno(errp, errno,
>                           "unable to map backing store for guest RAM");
> 

Queued, thanks.

Paolo
Markus Armbruster March 15, 2016, 4:41 p.m. UTC | #2
Paolo Bonzini <pbonzini@redhat.com> writes:

> On 07/03/2016 20:25, Markus Armbruster wrote:
>> gethugepagesize() works reliably only when its argument is on
>> hugetlbfs.  When it's not, it returns the filesystem's "optimal
>> transfer block size", which may or may not be the actual page size
>> you'll get when you mmap().
>> 
>> If the value is too small or not a power of two, we fail
>> qemu_ram_mmap()'s assertions.  These were added in commit 794e8f3
>> (v2.5.0).  The bug's impact before that is currently unknown.  Seems
>> fairly unlikely at least when the normal page size is 4KiB.
>> 
>> Else, if the value is too large, we align more strictly than
>> necessary.
>> 
>> gethugepagesize() goes back to commit c902760 (v0.13).  That commit
>> clearly intended gethugepagesize() to be used on hugetlbfs only.  Not
>> only was it named accordingly, it also printed a warning when used on
>> anything else.  However, the commit neglected to spell out the
>> restriction in user documentation of -mem-path.
>> 
>> Commit bfc2a1a (v2.5.0) dropped the warning as bogus "because QEMU
>> functions perfectly well with the path on a regular tmpfs filesystem".
>> It sure does when you're sufficiently lucky.  In my testing, I was
>> lucky, too.
>> 
>> Fix by switching to qemu_fd_getpagesize().  Rename the variable
>> holding its result from hpagesize to page_size.
>> 
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
[...]
>
> Queued, thanks.

Not in master, yet.  What repo+branch should I use as base for v3?
Paolo Bonzini March 15, 2016, 4:56 p.m. UTC | #3
On 15/03/2016 17:41, Markus Armbruster wrote:
> Paolo Bonzini <pbonzini@redhat.com> writes:
> 
>> On 07/03/2016 20:25, Markus Armbruster wrote:
>>> gethugepagesize() works reliably only when its argument is on
>>> hugetlbfs.  When it's not, it returns the filesystem's "optimal
>>> transfer block size", which may or may not be the actual page size
>>> you'll get when you mmap().
>>>
>>> If the value is too small or not a power of two, we fail
>>> qemu_ram_mmap()'s assertions.  These were added in commit 794e8f3
>>> (v2.5.0).  The bug's impact before that is currently unknown.  Seems
>>> fairly unlikely at least when the normal page size is 4KiB.
>>>
>>> Else, if the value is too large, we align more strictly than
>>> necessary.
>>>
>>> gethugepagesize() goes back to commit c902760 (v0.13).  That commit
>>> clearly intended gethugepagesize() to be used on hugetlbfs only.  Not
>>> only was it named accordingly, it also printed a warning when used on
>>> anything else.  However, the commit neglected to spell out the
>>> restriction in user documentation of -mem-path.
>>>
>>> Commit bfc2a1a (v2.5.0) dropped the warning as bogus "because QEMU
>>> functions perfectly well with the path on a regular tmpfs filesystem".
>>> It sure does when you're sufficiently lucky.  In my testing, I was
>>> lucky, too.
>>>
>>> Fix by switching to qemu_fd_getpagesize().  Rename the variable
>>> holding its result from hpagesize to page_size.
>>>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> [...]
>>
>> Queued, thanks.
> 
> Not in master, yet.  What repo+branch should I use as base for v3?

I'll send a pull request later today, keep an eye on it.

Paolo
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 5275ff4..d41194e 100644
--- a/exec.c
+++ b/exec.c
@@ -1207,27 +1207,6 @@  void qemu_mutex_unlock_ramlist(void)
 }
 
 #ifdef __linux__
-
-#include <sys/vfs.h>
-
-#define HUGETLBFS_MAGIC       0x958458f6
-
-static long gethugepagesize(int fd)
-{
-    struct statfs fs;
-    int ret;
-
-    do {
-        ret = fstatfs(fd, &fs);
-    } while (ret != 0 && errno == EINTR);
-
-    if (ret != 0) {
-        return -1;
-    }
-
-    return fs.f_bsize;
-}
-
 static void *file_ram_alloc(RAMBlock *block,
                             ram_addr_t memory,
                             const char *path,
@@ -1239,7 +1218,7 @@  static void *file_ram_alloc(RAMBlock *block,
     char *c;
     void *area;
     int fd;
-    int64_t hpagesize;
+    int64_t page_size;
 
     if (kvm_enabled() && !kvm_has_sync_mmu()) {
         error_setg(errp,
@@ -1294,22 +1273,17 @@  static void *file_ram_alloc(RAMBlock *block,
          */
     }
 
-    hpagesize = gethugepagesize(fd);
-    if (hpagesize < 0) {
-        error_setg_errno(errp, errno, "can't get page size for %s",
-                         path);
-        goto error;
-    }
-    block->mr->align = hpagesize;
+    page_size = qemu_fd_getpagesize(fd);
+    block->mr->align = page_size;
 
-    if (memory < hpagesize) {
+    if (memory < page_size) {
         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
                    "or larger than page size 0x%" PRIx64,
-                   memory, hpagesize);
+                   memory, page_size);
         goto error;
     }
 
-    memory = ROUND_UP(memory, hpagesize);
+    memory = ROUND_UP(memory, page_size);
 
     /*
      * ftruncate is not supported by hugetlbfs in older
@@ -1321,7 +1295,7 @@  static void *file_ram_alloc(RAMBlock *block,
         perror("ftruncate");
     }
 
-    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
+    area = qemu_ram_mmap(fd, memory, page_size, block->flags & RAM_SHARED);
     if (area == MAP_FAILED) {
         error_setg_errno(errp, errno,
                          "unable to map backing store for guest RAM");