diff mbox

[v3,08/32] exec: allow memory to be allocated from any kind of path

Message ID 1444535584-18220-9-git-send-email-guangrong.xiao@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong Oct. 11, 2015, 3:52 a.m. UTC
Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
locates at DAX enabled filesystem

So this patch let it work on any kind of path

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 exec.c | 55 ++++++++++++++-----------------------------------------
 1 file changed, 14 insertions(+), 41 deletions(-)

Comments

Michael S. Tsirkin Oct. 12, 2015, 10:08 a.m. UTC | #1
On Sun, Oct 11, 2015 at 11:52:40AM +0800, Xiao Guangrong wrote:
> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
> locates at DAX enabled filesystem
> 
> So this patch let it work on any kind of path
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>

This conflicts with map alloc rework.
Please rebase this on top of my tree.


> ---
>  exec.c | 55 ++++++++++++++-----------------------------------------
>  1 file changed, 14 insertions(+), 41 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 7d90a52..70cb0ef 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1154,32 +1154,6 @@ void qemu_mutex_unlock_ramlist(void)
>  }
>  
>  #ifdef __linux__
> -
> -#include <sys/vfs.h>
> -
> -#define HUGETLBFS_MAGIC       0x958458f6
> -
> -static long gethugepagesize(const char *path, Error **errp)
> -{
> -    struct statfs fs;
> -    int ret;
> -
> -    do {
> -        ret = statfs(path, &fs);
> -    } while (ret != 0 && errno == EINTR);
> -
> -    if (ret != 0) {
> -        error_setg_errno(errp, errno, "failed to get page size of file %s",
> -                         path);
> -        return 0;
> -    }
> -
> -    if (fs.f_type != HUGETLBFS_MAGIC)
> -        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> -
> -    return fs.f_bsize;

What this *actually* is trying to warn against is that
mapping a regular file (as opposed to hugetlbfs)
means transparent huge pages don't work.

So I don't think we should drop this warning completely.
Either let's add the nvdimm magic, or simply check the
page size.


> -}
> -
>  static void *file_ram_alloc(RAMBlock *block,
>                              ram_addr_t memory,
>                              const char *path,
> @@ -1191,22 +1165,21 @@ static void *file_ram_alloc(RAMBlock *block,
>      void *ptr;
>      void *area = NULL;
>      int fd;
> -    uint64_t hpagesize;
> +    uint64_t pagesize;
>      uint64_t total;
> -    Error *local_err = NULL;
>      size_t offset;
>  
> -    hpagesize = gethugepagesize(path, &local_err);
> -    if (local_err) {
> -        error_propagate(errp, local_err);
> +    pagesize = qemu_file_get_page_size(path);
> +    if (!pagesize) {
> +        error_setg(errp, "can't get page size for %s", path);
>          goto error;
>      }
> -    block->mr->align = hpagesize;
> +    block->mr->align = pagesize;
>  
> -    if (memory < hpagesize) {
> +    if (memory < pagesize) {
>          error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
> -                   "or larger than huge page size 0x%" PRIx64,
> -                   memory, hpagesize);
> +                   "or larger than page size 0x%" PRIx64,
> +                   memory, pagesize);
>          goto error;
>      }
>  
> @@ -1230,15 +1203,15 @@ static void *file_ram_alloc(RAMBlock *block,
>      fd = mkstemp(filename);
>      if (fd < 0) {
>          error_setg_errno(errp, errno,
> -                         "unable to create backing store for hugepages");
> +                         "unable to create backing store for path %s", path);
>          g_free(filename);
>          goto error;
>      }
>      unlink(filename);
>      g_free(filename);
>  
> -    memory = ROUND_UP(memory, hpagesize);
> -    total = memory + hpagesize;
> +    memory = ROUND_UP(memory, pagesize);
> +    total = memory + pagesize;
>  
>      /*
>       * ftruncate is not supported by hugetlbfs in older
> @@ -1254,12 +1227,12 @@ static void *file_ram_alloc(RAMBlock *block,
>                  -1, 0);
>      if (ptr == MAP_FAILED) {
>          error_setg_errno(errp, errno,
> -                         "unable to allocate memory range for hugepages");
> +                         "unable to allocate memory range for path %s", path);
>          close(fd);
>          goto error;
>      }
>  
> -    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
> +    offset = QEMU_ALIGN_UP((uintptr_t)ptr, pagesize) - (uintptr_t)ptr;
>  
>      area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
>                  (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
> @@ -1267,7 +1240,7 @@ static void *file_ram_alloc(RAMBlock *block,
>                  fd, 0);
>      if (area == MAP_FAILED) {
>          error_setg_errno(errp, errno,
> -                         "unable to map backing store for hugepages");
> +                         "unable to map backing store for path %s", path);
>          munmap(ptr, total);
>          close(fd);
>          goto error;
> -- 
> 1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Oct. 13, 2015, 3:31 a.m. UTC | #2
On 10/12/2015 06:08 PM, Michael S. Tsirkin wrote:
> On Sun, Oct 11, 2015 at 11:52:40AM +0800, Xiao Guangrong wrote:
>> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
>> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
>> locates at DAX enabled filesystem
>>
>> So this patch let it work on any kind of path
>>
>> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
>
> This conflicts with map alloc rework.
> Please rebase this on top of my tree.
>

Okay, thanks for your reminder. I did it based on upstream QEMU tree,
will do it on pci branch on your tree instead.

>
>> ---
>>   exec.c | 55 ++++++++++++++-----------------------------------------
>>   1 file changed, 14 insertions(+), 41 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index 7d90a52..70cb0ef 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -1154,32 +1154,6 @@ void qemu_mutex_unlock_ramlist(void)
>>   }
>>
>>   #ifdef __linux__
>> -
>> -#include <sys/vfs.h>
>> -
>> -#define HUGETLBFS_MAGIC       0x958458f6
>> -
>> -static long gethugepagesize(const char *path, Error **errp)
>> -{
>> -    struct statfs fs;
>> -    int ret;
>> -
>> -    do {
>> -        ret = statfs(path, &fs);
>> -    } while (ret != 0 && errno == EINTR);
>> -
>> -    if (ret != 0) {
>> -        error_setg_errno(errp, errno, "failed to get page size of file %s",
>> -                         path);
>> -        return 0;
>> -    }
>> -
>> -    if (fs.f_type != HUGETLBFS_MAGIC)
>> -        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
>> -
>> -    return fs.f_bsize;
>
> What this *actually* is trying to warn against is that
> mapping a regular file (as opposed to hugetlbfs)
> means transparent huge pages don't work.
>
> So I don't think we should drop this warning completely.
> Either let's add the nvdimm magic, or simply check the
> page size.

Check the page size sounds good, will check:
if (pagesize != getpagesize()) {
	...print something...
}

I agree with you that showing the info is needed, however,
'Warning' might scare some users, how about drop this word or
just show?“Memory is not allocated from HugeTlbfs”?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 7d90a52..70cb0ef 100644
--- a/exec.c
+++ b/exec.c
@@ -1154,32 +1154,6 @@  void qemu_mutex_unlock_ramlist(void)
 }
 
 #ifdef __linux__
-
-#include <sys/vfs.h>
-
-#define HUGETLBFS_MAGIC       0x958458f6
-
-static long gethugepagesize(const char *path, Error **errp)
-{
-    struct statfs fs;
-    int ret;
-
-    do {
-        ret = statfs(path, &fs);
-    } while (ret != 0 && errno == EINTR);
-
-    if (ret != 0) {
-        error_setg_errno(errp, errno, "failed to get page size of file %s",
-                         path);
-        return 0;
-    }
-
-    if (fs.f_type != HUGETLBFS_MAGIC)
-        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
-
-    return fs.f_bsize;
-}
-
 static void *file_ram_alloc(RAMBlock *block,
                             ram_addr_t memory,
                             const char *path,
@@ -1191,22 +1165,21 @@  static void *file_ram_alloc(RAMBlock *block,
     void *ptr;
     void *area = NULL;
     int fd;
-    uint64_t hpagesize;
+    uint64_t pagesize;
     uint64_t total;
-    Error *local_err = NULL;
     size_t offset;
 
-    hpagesize = gethugepagesize(path, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    pagesize = qemu_file_get_page_size(path);
+    if (!pagesize) {
+        error_setg(errp, "can't get page size for %s", path);
         goto error;
     }
-    block->mr->align = hpagesize;
+    block->mr->align = pagesize;
 
-    if (memory < hpagesize) {
+    if (memory < pagesize) {
         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
-                   "or larger than huge page size 0x%" PRIx64,
-                   memory, hpagesize);
+                   "or larger than page size 0x%" PRIx64,
+                   memory, pagesize);
         goto error;
     }
 
@@ -1230,15 +1203,15 @@  static void *file_ram_alloc(RAMBlock *block,
     fd = mkstemp(filename);
     if (fd < 0) {
         error_setg_errno(errp, errno,
-                         "unable to create backing store for hugepages");
+                         "unable to create backing store for path %s", path);
         g_free(filename);
         goto error;
     }
     unlink(filename);
     g_free(filename);
 
-    memory = ROUND_UP(memory, hpagesize);
-    total = memory + hpagesize;
+    memory = ROUND_UP(memory, pagesize);
+    total = memory + pagesize;
 
     /*
      * ftruncate is not supported by hugetlbfs in older
@@ -1254,12 +1227,12 @@  static void *file_ram_alloc(RAMBlock *block,
                 -1, 0);
     if (ptr == MAP_FAILED) {
         error_setg_errno(errp, errno,
-                         "unable to allocate memory range for hugepages");
+                         "unable to allocate memory range for path %s", path);
         close(fd);
         goto error;
     }
 
-    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
+    offset = QEMU_ALIGN_UP((uintptr_t)ptr, pagesize) - (uintptr_t)ptr;
 
     area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
                 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
@@ -1267,7 +1240,7 @@  static void *file_ram_alloc(RAMBlock *block,
                 fd, 0);
     if (area == MAP_FAILED) {
         error_setg_errno(errp, errno,
-                         "unable to map backing store for hugepages");
+                         "unable to map backing store for path %s", path);
         munmap(ptr, total);
         close(fd);
         goto error;