[v6,08/33] exec: allow memory to be allocated from any kind of path
diff mbox

Message ID 1446184587-142784-9-git-send-email-guangrong.xiao@linux.intel.com
State New
Headers show

Commit Message

Xiao Guangrong Oct. 30, 2015, 5:56 a.m. UTC
Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
locates at DAX enabled filesystem

So this patch let it work on any kind of path

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 exec.c | 56 +++++++++++++++++---------------------------------------
 1 file changed, 17 insertions(+), 39 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy Oct. 30, 2015, 2:04 p.m. UTC | #1
On 30.10.2015 08:56, Xiao Guangrong wrote:
> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
> locates at DAX enabled filesystem
>
> So this patch let it work on any kind of path
>
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>   exec.c | 56 +++++++++++++++++---------------------------------------
>   1 file changed, 17 insertions(+), 39 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 8af2570..3ca7e50 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1174,32 +1174,6 @@ void qemu_mutex_unlock_ramlist(void)
>   }
>   
>   #ifdef __linux__
> -
> -#include <sys/vfs.h>
> -
> -#define HUGETLBFS_MAGIC       0x958458f6
> -
> -static long gethugepagesize(const char *path, Error **errp)
> -{
> -    struct statfs fs;
> -    int ret;
> -
> -    do {
> -        ret = statfs(path, &fs);
> -    } while (ret != 0 && errno == EINTR);
> -
> -    if (ret != 0) {
> -        error_setg_errno(errp, errno, "failed to get page size of file %s",
> -                         path);
> -        return 0;
> -    }
> -
> -    if (fs.f_type != HUGETLBFS_MAGIC)
> -        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> -
> -    return fs.f_bsize;
> -}
> -
>   static void *file_ram_alloc(RAMBlock *block,
>                               ram_addr_t memory,
>                               const char *path,
> @@ -1210,20 +1184,24 @@ static void *file_ram_alloc(RAMBlock *block,
>       char *c;
>       void *area;
>       int fd;
> -    uint64_t hpagesize;
> -    Error *local_err = NULL;
> +    uint64_t pagesize;
>   
> -    hpagesize = gethugepagesize(path, &local_err);
> -    if (local_err) {
> -        error_propagate(errp, local_err);
> +    pagesize = qemu_file_get_page_size(path);
> +    if (!pagesize) {
> +        error_setg(errp, "can't get page size for %s", path);
>           goto error;
>       }
> -    block->mr->align = hpagesize;
>   
> -    if (memory < hpagesize) {
> +    if (pagesize == getpagesize()) {
> +        fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n");
> +    }

It is strange to see this warning every time.

Shouldn't the differentiation be done explicitly in command line? May be 
separate option mem-tlb, or separate flag tlbfs=on, or for new feature - 
new option mem-file, or prefixes for paths (tlbfs://, file://).. Or the 
other way to not mix things but split them.

> +
> +    block->mr->align = pagesize;
> +
> +    if (memory < pagesize) {
>           error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
> -                   "or larger than huge page size 0x%" PRIx64,
> -                   memory, hpagesize);
> +                   "or larger than page size 0x%" PRIx64,
> +                   memory, pagesize);
>           goto error;
>       }
>   
> @@ -1247,14 +1225,14 @@ static void *file_ram_alloc(RAMBlock *block,
>       fd = mkstemp(filename);
>       if (fd < 0) {
>           error_setg_errno(errp, errno,
> -                         "unable to create backing store for hugepages");
> +                         "unable to create backing store for path %s", path);
>           g_free(filename);
>           goto error;
>       }
>       unlink(filename);
>       g_free(filename);
>   
> -    memory = ROUND_UP(memory, hpagesize);
> +    memory = ROUND_UP(memory, pagesize);
>   
>       /*
>        * ftruncate is not supported by hugetlbfs in older
> @@ -1266,10 +1244,10 @@ static void *file_ram_alloc(RAMBlock *block,
>           perror("ftruncate");
>       }
>   
> -    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
> +    area = qemu_ram_mmap(fd, memory, pagesize, block->flags & RAM_SHARED);
>       if (area == MAP_FAILED) {
>           error_setg_errno(errp, errno,
> -                         "unable to map backing store for hugepages");
> +                         "unable to map backing store for path %s", path);
>           close(fd);
>           goto error;
>       }
Xiao Guangrong Oct. 31, 2015, 7:44 a.m. UTC | #2
On 10/30/2015 10:04 PM, Vladimir Sementsov-Ogievskiy wrote:
> On 30.10.2015 08:56, Xiao Guangrong wrote:
>> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
>> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
>> locates at DAX enabled filesystem
>>
>> So this patch let it work on any kind of path
>>
>> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
>> ---
>>   exec.c | 56 +++++++++++++++++---------------------------------------
>>   1 file changed, 17 insertions(+), 39 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index 8af2570..3ca7e50 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -1174,32 +1174,6 @@ void qemu_mutex_unlock_ramlist(void)
>>   }
>>   #ifdef __linux__
>> -
>> -#include <sys/vfs.h>
>> -
>> -#define HUGETLBFS_MAGIC       0x958458f6
>> -
>> -static long gethugepagesize(const char *path, Error **errp)
>> -{
>> -    struct statfs fs;
>> -    int ret;
>> -
>> -    do {
>> -        ret = statfs(path, &fs);
>> -    } while (ret != 0 && errno == EINTR);
>> -
>> -    if (ret != 0) {
>> -        error_setg_errno(errp, errno, "failed to get page size of file %s",
>> -                         path);
>> -        return 0;
>> -    }
>> -
>> -    if (fs.f_type != HUGETLBFS_MAGIC)
>> -        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
>> -
>> -    return fs.f_bsize;
>> -}
>> -
>>   static void *file_ram_alloc(RAMBlock *block,
>>                               ram_addr_t memory,
>>                               const char *path,
>> @@ -1210,20 +1184,24 @@ static void *file_ram_alloc(RAMBlock *block,
>>       char *c;
>>       void *area;
>>       int fd;
>> -    uint64_t hpagesize;
>> -    Error *local_err = NULL;
>> +    uint64_t pagesize;
>> -    hpagesize = gethugepagesize(path, &local_err);
>> -    if (local_err) {
>> -        error_propagate(errp, local_err);
>> +    pagesize = qemu_file_get_page_size(path);
>> +    if (!pagesize) {
>> +        error_setg(errp, "can't get page size for %s", path);
>>           goto error;
>>       }
>> -    block->mr->align = hpagesize;
>> -    if (memory < hpagesize) {
>> +    if (pagesize == getpagesize()) {
>> +        fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n");
>> +    }
>
> It is strange to see this warning every time.
>
> Shouldn't the differentiation be done explicitly in command line? May be separate option mem-tlb, or
> separate flag tlbfs=on, or for new feature - new option mem-file, or prefixes for paths (tlbfs://,
> file://).. Or the other way to not mix things but split them.

This is just a reminder to users. Currently Qemu do not stop user to append a regular file
for its backend memory, particularly, hugetlbfs is not the only way to use HugePage for
the THP-enabled system.

We can implement your idea as a independent patchset in the future.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eduardo Habkost Oct. 31, 2015, 1:55 p.m. UTC | #3
On Sat, Oct 31, 2015 at 03:44:39PM +0800, Xiao Guangrong wrote:
> On 10/30/2015 10:04 PM, Vladimir Sementsov-Ogievskiy wrote:
> >On 30.10.2015 08:56, Xiao Guangrong wrote:
> >>Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
> >>of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
> >>locates at DAX enabled filesystem
> >>
> >>So this patch let it work on any kind of path
> >>
> >>Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
[...]
> >>-    block->mr->align = hpagesize;
> >>-    if (memory < hpagesize) {
> >>+    if (pagesize == getpagesize()) {
> >>+        fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n");
> >>+    }
> >
> >It is strange to see this warning every time.
> >
> >Shouldn't the differentiation be done explicitly in command line? May be separate option mem-tlb, or
> >separate flag tlbfs=on, or for new feature - new option mem-file, or prefixes for paths (tlbfs://,
> >file://).. Or the other way to not mix things but split them.
> 
> This is just a reminder to users. Currently Qemu do not stop user to append a regular file
> for its backend memory, particularly, hugetlbfs is not the only way to use HugePage for
> the THP-enabled system.
> 
> We can implement your idea as a independent patchset in the future.

Isn't the whole point of this patch to make the code not specific to
hugetlbfs anymore? It makes the warning obsolete.
Xiao Guangrong Oct. 31, 2015, 3:56 p.m. UTC | #4
On 10/31/2015 09:55 PM, Eduardo Habkost wrote:
> On Sat, Oct 31, 2015 at 03:44:39PM +0800, Xiao Guangrong wrote:
>> On 10/30/2015 10:04 PM, Vladimir Sementsov-Ogievskiy wrote:
>>> On 30.10.2015 08:56, Xiao Guangrong wrote:
>>>> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
>>>> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
>>>> locates at DAX enabled filesystem
>>>>
>>>> So this patch let it work on any kind of path
>>>>
>>>> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> [...]
>>>> -    block->mr->align = hpagesize;
>>>> -    if (memory < hpagesize) {
>>>> +    if (pagesize == getpagesize()) {
>>>> +        fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n");
>>>> +    }
>>>
>>> It is strange to see this warning every time.
>>>
>>> Shouldn't the differentiation be done explicitly in command line? May be separate option mem-tlb, or
>>> separate flag tlbfs=on, or for new feature - new option mem-file, or prefixes for paths (tlbfs://,
>>> file://).. Or the other way to not mix things but split them.
>>
>> This is just a reminder to users. Currently Qemu do not stop user to append a regular file
>> for its backend memory, particularly, hugetlbfs is not the only way to use HugePage for
>> the THP-enabled system.
>>
>> We can implement your idea as a independent patchset in the future.
>
> Isn't the whole point of this patch to make the code not specific to
> hugetlbfs anymore? It makes the warning obsolete.
>

Before this patchset, this warning still can be triggered, for example, we
can specify the @path to /tmp/

I agree this warning could be improved, but let us do it independently.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Nov. 9, 2015, 10:39 a.m. UTC | #5
On Fri, Oct 30, 2015 at 01:56:02PM +0800, Xiao Guangrong wrote:
> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
> locates at DAX enabled filesystem
> 
> So this patch let it work on any kind of path
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>

So this allows regular memory to be specified directly.
This needs to be split out and merged separately
from acpi/nvdimm bits.

Alternatively, if it's possible to use nvdimm with DAX fs
(similar to hugetlbfs), leave these patches off for now.


> ---
>  exec.c | 56 +++++++++++++++++---------------------------------------
>  1 file changed, 17 insertions(+), 39 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 8af2570..3ca7e50 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1174,32 +1174,6 @@ void qemu_mutex_unlock_ramlist(void)
>  }
>  
>  #ifdef __linux__
> -
> -#include <sys/vfs.h>
> -
> -#define HUGETLBFS_MAGIC       0x958458f6
> -
> -static long gethugepagesize(const char *path, Error **errp)
> -{
> -    struct statfs fs;
> -    int ret;
> -
> -    do {
> -        ret = statfs(path, &fs);
> -    } while (ret != 0 && errno == EINTR);
> -
> -    if (ret != 0) {
> -        error_setg_errno(errp, errno, "failed to get page size of file %s",
> -                         path);
> -        return 0;
> -    }
> -
> -    if (fs.f_type != HUGETLBFS_MAGIC)
> -        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> -
> -    return fs.f_bsize;
> -}
> -
>  static void *file_ram_alloc(RAMBlock *block,
>                              ram_addr_t memory,
>                              const char *path,
> @@ -1210,20 +1184,24 @@ static void *file_ram_alloc(RAMBlock *block,
>      char *c;
>      void *area;
>      int fd;
> -    uint64_t hpagesize;
> -    Error *local_err = NULL;
> +    uint64_t pagesize;
>  
> -    hpagesize = gethugepagesize(path, &local_err);
> -    if (local_err) {
> -        error_propagate(errp, local_err);
> +    pagesize = qemu_file_get_page_size(path);
> +    if (!pagesize) {
> +        error_setg(errp, "can't get page size for %s", path);
>          goto error;
>      }
> -    block->mr->align = hpagesize;
>  
> -    if (memory < hpagesize) {
> +    if (pagesize == getpagesize()) {
> +        fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n");
> +    }
> +
> +    block->mr->align = pagesize;
> +
> +    if (memory < pagesize) {
>          error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
> -                   "or larger than huge page size 0x%" PRIx64,
> -                   memory, hpagesize);
> +                   "or larger than page size 0x%" PRIx64,
> +                   memory, pagesize);
>          goto error;
>      }
>  
> @@ -1247,14 +1225,14 @@ static void *file_ram_alloc(RAMBlock *block,
>      fd = mkstemp(filename);
>      if (fd < 0) {
>          error_setg_errno(errp, errno,
> -                         "unable to create backing store for hugepages");
> +                         "unable to create backing store for path %s", path);
>          g_free(filename);
>          goto error;
>      }
>      unlink(filename);
>      g_free(filename);

Looks like we are still calling mkstemp/unlink here.
How does this work?

>  
> -    memory = ROUND_UP(memory, hpagesize);
> +    memory = ROUND_UP(memory, pagesize);
>  
>      /*
>       * ftruncate is not supported by hugetlbfs in older
> @@ -1266,10 +1244,10 @@ static void *file_ram_alloc(RAMBlock *block,
>          perror("ftruncate");
>      }
>  
> -    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
> +    area = qemu_ram_mmap(fd, memory, pagesize, block->flags & RAM_SHARED);
>      if (area == MAP_FAILED) {
>          error_setg_errno(errp, errno,
> -                         "unable to map backing store for hugepages");
> +                         "unable to map backing store for path %s", path);
>          close(fd);
>          goto error;
>      }
> -- 
> 1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Nov. 9, 2015, 11:01 a.m. UTC | #6
On 11/09/2015 06:39 PM, Michael S. Tsirkin wrote:
> On Fri, Oct 30, 2015 at 01:56:02PM +0800, Xiao Guangrong wrote:
>> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
>> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
>> locates at DAX enabled filesystem
>>
>> So this patch let it work on any kind of path
>>
>> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
>
> So this allows regular memory to be specified directly.
> This needs to be split out and merged separately
> from acpi/nvdimm bits.

Yup, that is in my plan.

>
> Alternatively, if it's possible to use nvdimm with DAX fs
> (similar to hugetlbfs), leave these patches off for now.
>

DAX is a filesystem mount options, it is not easy to get this
info from a file.

>
>> ---
>>   exec.c | 56 +++++++++++++++++---------------------------------------
>>   1 file changed, 17 insertions(+), 39 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index 8af2570..3ca7e50 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -1174,32 +1174,6 @@ void qemu_mutex_unlock_ramlist(void)
>>   }
>>
>>   #ifdef __linux__
>> -
>> -#include <sys/vfs.h>
>> -
>> -#define HUGETLBFS_MAGIC       0x958458f6
>> -
>> -static long gethugepagesize(const char *path, Error **errp)
>> -{
>> -    struct statfs fs;
>> -    int ret;
>> -
>> -    do {
>> -        ret = statfs(path, &fs);
>> -    } while (ret != 0 && errno == EINTR);
>> -
>> -    if (ret != 0) {
>> -        error_setg_errno(errp, errno, "failed to get page size of file %s",
>> -                         path);
>> -        return 0;
>> -    }
>> -
>> -    if (fs.f_type != HUGETLBFS_MAGIC)
>> -        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
>> -
>> -    return fs.f_bsize;
>> -}
>> -
>>   static void *file_ram_alloc(RAMBlock *block,
>>                               ram_addr_t memory,
>>                               const char *path,
>> @@ -1210,20 +1184,24 @@ static void *file_ram_alloc(RAMBlock *block,
>>       char *c;
>>       void *area;
>>       int fd;
>> -    uint64_t hpagesize;
>> -    Error *local_err = NULL;
>> +    uint64_t pagesize;
>>
>> -    hpagesize = gethugepagesize(path, &local_err);
>> -    if (local_err) {
>> -        error_propagate(errp, local_err);
>> +    pagesize = qemu_file_get_page_size(path);
>> +    if (!pagesize) {
>> +        error_setg(errp, "can't get page size for %s", path);
>>           goto error;
>>       }
>> -    block->mr->align = hpagesize;
>>
>> -    if (memory < hpagesize) {
>> +    if (pagesize == getpagesize()) {
>> +        fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n");
>> +    }
>> +
>> +    block->mr->align = pagesize;
>> +
>> +    if (memory < pagesize) {
>>           error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
>> -                   "or larger than huge page size 0x%" PRIx64,
>> -                   memory, hpagesize);
>> +                   "or larger than page size 0x%" PRIx64,
>> +                   memory, pagesize);
>>           goto error;
>>       }
>>
>> @@ -1247,14 +1225,14 @@ static void *file_ram_alloc(RAMBlock *block,
>>       fd = mkstemp(filename);
>>       if (fd < 0) {
>>           error_setg_errno(errp, errno,
>> -                         "unable to create backing store for hugepages");
>> +                         "unable to create backing store for path %s", path);
>>           g_free(filename);
>>           goto error;
>>       }
>>       unlink(filename);
>>       g_free(filename);
>
> Looks like we are still calling mkstemp/unlink here.
> How does this work?

Hmm? We have got the fd so the file can be safely unlinked (kernel does not actually unlink
the file since mkstemp() holds a refcount.).

And this patch just renames the variables, no logic changed. Will drop it from the serials
for now on.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/exec.c b/exec.c
index 8af2570..3ca7e50 100644
--- a/exec.c
+++ b/exec.c
@@ -1174,32 +1174,6 @@  void qemu_mutex_unlock_ramlist(void)
 }
 
 #ifdef __linux__
-
-#include <sys/vfs.h>
-
-#define HUGETLBFS_MAGIC       0x958458f6
-
-static long gethugepagesize(const char *path, Error **errp)
-{
-    struct statfs fs;
-    int ret;
-
-    do {
-        ret = statfs(path, &fs);
-    } while (ret != 0 && errno == EINTR);
-
-    if (ret != 0) {
-        error_setg_errno(errp, errno, "failed to get page size of file %s",
-                         path);
-        return 0;
-    }
-
-    if (fs.f_type != HUGETLBFS_MAGIC)
-        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
-
-    return fs.f_bsize;
-}
-
 static void *file_ram_alloc(RAMBlock *block,
                             ram_addr_t memory,
                             const char *path,
@@ -1210,20 +1184,24 @@  static void *file_ram_alloc(RAMBlock *block,
     char *c;
     void *area;
     int fd;
-    uint64_t hpagesize;
-    Error *local_err = NULL;
+    uint64_t pagesize;
 
-    hpagesize = gethugepagesize(path, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    pagesize = qemu_file_get_page_size(path);
+    if (!pagesize) {
+        error_setg(errp, "can't get page size for %s", path);
         goto error;
     }
-    block->mr->align = hpagesize;
 
-    if (memory < hpagesize) {
+    if (pagesize == getpagesize()) {
+        fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n");
+    }
+
+    block->mr->align = pagesize;
+
+    if (memory < pagesize) {
         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
-                   "or larger than huge page size 0x%" PRIx64,
-                   memory, hpagesize);
+                   "or larger than page size 0x%" PRIx64,
+                   memory, pagesize);
         goto error;
     }
 
@@ -1247,14 +1225,14 @@  static void *file_ram_alloc(RAMBlock *block,
     fd = mkstemp(filename);
     if (fd < 0) {
         error_setg_errno(errp, errno,
-                         "unable to create backing store for hugepages");
+                         "unable to create backing store for path %s", path);
         g_free(filename);
         goto error;
     }
     unlink(filename);
     g_free(filename);
 
-    memory = ROUND_UP(memory, hpagesize);
+    memory = ROUND_UP(memory, pagesize);
 
     /*
      * ftruncate is not supported by hugetlbfs in older
@@ -1266,10 +1244,10 @@  static void *file_ram_alloc(RAMBlock *block,
         perror("ftruncate");
     }
 
-    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
+    area = qemu_ram_mmap(fd, memory, pagesize, block->flags & RAM_SHARED);
     if (area == MAP_FAILED) {
         error_setg_errno(errp, errno,
-                         "unable to map backing store for hugepages");
+                         "unable to map backing store for path %s", path);
         close(fd);
         goto error;
     }