[v7,07/35] util: introduce qemu_file_get_page_size()
diff mbox

Message ID 1446455617-129562-8-git-send-email-guangrong.xiao@linux.intel.com
State New
Headers show

Commit Message

Xiao Guangrong Nov. 2, 2015, 9:13 a.m. UTC
There are three places use the some logic to get the page size on
the file path or file fd

Windows did not support file hugepage, so it will return normal page
for this case. And this interface has not been used on windows so far
 
This patch introduces qemu_file_get_page_size() to unify the code

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 exec.c               | 33 ++++++---------------------------
 include/qemu/osdep.h |  1 +
 target-ppc/kvm.c     | 23 +++++------------------
 util/oslib-posix.c   | 37 +++++++++++++++++++++++++++++++++----
 util/oslib-win32.c   |  5 +++++
 5 files changed, 50 insertions(+), 49 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy Nov. 2, 2015, 1:56 p.m. UTC | #1
On 02.11.2015 12:13, Xiao Guangrong wrote:
> There are three places use the some logic to get the page size on
> the file path or file fd
>
> Windows did not support file hugepage, so it will return normal page
> for this case. And this interface has not been used on windows so far
>   
> This patch introduces qemu_file_get_page_size() to unify the code
>
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>   exec.c               | 33 ++++++---------------------------
>   include/qemu/osdep.h |  1 +
>   target-ppc/kvm.c     | 23 +++++------------------
>   util/oslib-posix.c   | 37 +++++++++++++++++++++++++++++++++----
>   util/oslib-win32.c   |  5 +++++
>   5 files changed, 50 insertions(+), 49 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 8af2570..9de38be 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1174,32 +1174,6 @@ void qemu_mutex_unlock_ramlist(void)
>   }
>   
>   #ifdef __linux__
> -
> -#include <sys/vfs.h>
> -
> -#define HUGETLBFS_MAGIC       0x958458f6
> -
> -static long gethugepagesize(const char *path, Error **errp)
> -{
> -    struct statfs fs;
> -    int ret;
> -
> -    do {
> -        ret = statfs(path, &fs);
> -    } while (ret != 0 && errno == EINTR);
> -
> -    if (ret != 0) {
> -        error_setg_errno(errp, errno, "failed to get page size of file %s",
> -                         path);
> -        return 0;
> -    }
> -
> -    if (fs.f_type != HUGETLBFS_MAGIC)
> -        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> -
> -    return fs.f_bsize;
> -}
> -
>   static void *file_ram_alloc(RAMBlock *block,
>                               ram_addr_t memory,
>                               const char *path,
> @@ -1213,11 +1187,16 @@ static void *file_ram_alloc(RAMBlock *block,
>       uint64_t hpagesize;
>       Error *local_err = NULL;
>   
> -    hpagesize = gethugepagesize(path, &local_err);
> +    hpagesize = qemu_file_get_page_size(path, &local_err);
>       if (local_err) {
>           error_propagate(errp, local_err);
>           goto error;
>       }
> +
> +    if (hpagesize == getpagesize()) {
> +        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> +    }
> +
>       block->mr->align = hpagesize;
>   
>       if (memory < hpagesize) {
> diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> index b568424..dbc17dc 100644
> --- a/include/qemu/osdep.h
> +++ b/include/qemu/osdep.h
> @@ -302,4 +302,5 @@ int qemu_read_password(char *buf, int buf_size);
>    */
>   pid_t qemu_fork(Error **errp);
>   
> +size_t qemu_file_get_page_size(const char *mem_path, Error **errp);
>   #endif
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index ac70f08..d8760ea 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -308,28 +308,15 @@ static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
>   
>   static long gethugepagesize(const char *mem_path)
>   {
> -    struct statfs fs;
> -    int ret;
> -
> -    do {
> -        ret = statfs(mem_path, &fs);
> -    } while (ret != 0 && errno == EINTR);
> +    Error *local_err = NULL;
> +    long size = qemu_file_get_page_size(mem_path, local_err);
>   
> -    if (ret != 0) {
> -        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
> -                strerror(errno));
> +    if (local_err) {
> +        error_report_err(local_err);
>           exit(1);
>       }
>   
> -#define HUGETLBFS_MAGIC       0x958458f6
> -
> -    if (fs.f_type != HUGETLBFS_MAGIC) {
> -        /* Explicit mempath, but it's ordinary pages */
> -        return getpagesize();
> -    }
> -
> -    /* It's hugepage, return the huge page size */
> -    return fs.f_bsize;
> +    return size;
>   }
>   
>   static int find_max_supported_pagesize(Object *obj, void *opaque)
> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> index 914cef5..51437ff 100644
> --- a/util/oslib-posix.c
> +++ b/util/oslib-posix.c
> @@ -340,7 +340,7 @@ static void sigbus_handler(int signal)
>       siglongjmp(sigjump, 1);
>   }
>   
> -static size_t fd_getpagesize(int fd)
> +static size_t fd_getpagesize(int fd, Error **errp)
>   {
>   #ifdef CONFIG_LINUX
>       struct statfs fs;
> @@ -351,7 +351,12 @@ static size_t fd_getpagesize(int fd)
>               ret = fstatfs(fd, &fs);
>           } while (ret != 0 && errno == EINTR);
>   
> -        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
> +        if (ret) {
> +            error_setg_errno(errp, errno, "fstatfs is failed");
> +            return 0;
> +        }
> +
> +        if (fs.f_type == HUGETLBFS_MAGIC) {
>               return fs.f_bsize;
>           }
>       }
> @@ -360,6 +365,22 @@ static size_t fd_getpagesize(int fd)
>       return getpagesize();
>   }
>   
> +size_t qemu_file_get_page_size(const char *path, Error **errp)
> +{
> +    size_t size = 0;
> +    int fd = qemu_open(path, O_RDONLY);
> +
> +    if (fd < 0) {
> +        error_setg_file_open(errp, errno, path);
> +        goto exit;
> +    }
> +
> +    size = fd_getpagesize(fd, errp);
> +    qemu_close(fd);
> +exit:
> +    return size;
> +}
> +
>   void os_mem_prealloc(int fd, char *area, size_t memory)
>   {
>       int ret;
> @@ -387,8 +408,16 @@ void os_mem_prealloc(int fd, char *area, size_t memory)
>           exit(1);
>       } else {
>           int i;
> -        size_t hpagesize = fd_getpagesize(fd);
> -        size_t numpages = DIV_ROUND_UP(memory, hpagesize);
> +        Error *local_err = NULL;
> +        size_t hpagesize = fd_getpagesize(fd, &local_err);
> +        size_t numpages;
> +
> +        if (local_err) {
> +            error_report_err(local_err);
> +            exit(1);
> +        }
> +
> +        numpages = DIV_ROUND_UP(memory, hpagesize);
>   
>           /* MAP_POPULATE silently ignores failures */
>           for (i = 0; i < numpages; i++) {
> diff --git a/util/oslib-win32.c b/util/oslib-win32.c
> index 09f9e98..dada6b6 100644
> --- a/util/oslib-win32.c
> +++ b/util/oslib-win32.c
> @@ -462,6 +462,11 @@ size_t getpagesize(void)
>       return system_info.dwPageSize;
>   }
>   
> +size_t qemu_file_get_page_size(const char *path, Error **errp)
> +{
> +    return getpagesize();
> +}
> +
>   void os_mem_prealloc(int fd, char *area, size_t memory)
>   {
>       int i;

Ok for me. The only thing: some functions about pagesize return size_t, 
when others return long. long is more common practice here, but this 
doesn't really matter, so with or without size_t <-> long changes:

Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Eduardo Habkost Nov. 6, 2015, 3:36 p.m. UTC | #2
On Mon, Nov 02, 2015 at 05:13:09PM +0800, Xiao Guangrong wrote:
> There are three places use the some logic to get the page size on
> the file path or file fd
> 
> Windows did not support file hugepage, so it will return normal page
> for this case. And this interface has not been used on windows so far
>  
> This patch introduces qemu_file_get_page_size() to unify the code
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
[...]
> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> index 914cef5..51437ff 100644
> --- a/util/oslib-posix.c
> +++ b/util/oslib-posix.c
> @@ -340,7 +340,7 @@ static void sigbus_handler(int signal)
>      siglongjmp(sigjump, 1);
>  }
>  
> -static size_t fd_getpagesize(int fd)
> +static size_t fd_getpagesize(int fd, Error **errp)
>  {
>  #ifdef CONFIG_LINUX
>      struct statfs fs;
> @@ -351,7 +351,12 @@ static size_t fd_getpagesize(int fd)
>              ret = fstatfs(fd, &fs);
>          } while (ret != 0 && errno == EINTR);
>  
> -        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
> +        if (ret) {
> +            error_setg_errno(errp, errno, "fstatfs is failed");
> +            return 0;
> +        }
> +
> +        if (fs.f_type == HUGETLBFS_MAGIC) {
>              return fs.f_bsize;
>          }

You are changing os_mem_prealloc() behavior when fstatfs() fails, here.
Have you ensured there are no cases where fstatfs() fails but this code
is still expected to work?

The change looks safe: gethugepagesize() seems to be always called in
the path that would make fd_getpagesize() be called from
os_mem_prealloc(), so allocation would abort much earlier if statfs()
failed. But I haven't confirmed that yet, and I wanted to be sure.
Xiao Guangrong Nov. 9, 2015, 4:36 a.m. UTC | #3
On 11/06/2015 11:36 PM, Eduardo Habkost wrote:
> On Mon, Nov 02, 2015 at 05:13:09PM +0800, Xiao Guangrong wrote:
>> There are three places use the some logic to get the page size on
>> the file path or file fd
>>
>> Windows did not support file hugepage, so it will return normal page
>> for this case. And this interface has not been used on windows so far
>>
>> This patch introduces qemu_file_get_page_size() to unify the code
>>
>> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> [...]
>> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
>> index 914cef5..51437ff 100644
>> --- a/util/oslib-posix.c
>> +++ b/util/oslib-posix.c
>> @@ -340,7 +340,7 @@ static void sigbus_handler(int signal)
>>       siglongjmp(sigjump, 1);
>>   }
>>
>> -static size_t fd_getpagesize(int fd)
>> +static size_t fd_getpagesize(int fd, Error **errp)
>>   {
>>   #ifdef CONFIG_LINUX
>>       struct statfs fs;
>> @@ -351,7 +351,12 @@ static size_t fd_getpagesize(int fd)
>>               ret = fstatfs(fd, &fs);
>>           } while (ret != 0 && errno == EINTR);
>>
>> -        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
>> +        if (ret) {
>> +            error_setg_errno(errp, errno, "fstatfs is failed");
>> +            return 0;
>> +        }
>> +
>> +        if (fs.f_type == HUGETLBFS_MAGIC) {
>>               return fs.f_bsize;
>>           }
>
> You are changing os_mem_prealloc() behavior when fstatfs() fails, here.
> Have you ensured there are no cases where fstatfs() fails but this code
> is still expected to work?

stat() is supported for all kinds of files, so failed stat() is caused by
file is not exist or kernel internal error (e,g memory is not enough) or
security check is not passed. Whichever we should not do any operation on
the file if stat() failed. The origin code did not check it but it is worth
being fixed i think.

>
> The change looks safe: gethugepagesize() seems to be always called in
> the path that would make fd_getpagesize() be called from
> os_mem_prealloc(), so allocation would abort much earlier if statfs()
> failed. But I haven't confirmed that yet, and I wanted to be sure.
>

Yes, I am entirely agree with you. :)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eduardo Habkost Nov. 9, 2015, 6:34 p.m. UTC | #4
On Mon, Nov 09, 2015 at 12:36:36PM +0800, Xiao Guangrong wrote:
> On 11/06/2015 11:36 PM, Eduardo Habkost wrote:
> >On Mon, Nov 02, 2015 at 05:13:09PM +0800, Xiao Guangrong wrote:
> >>There are three places use the some logic to get the page size on
> >>the file path or file fd
> >>
> >>Windows did not support file hugepage, so it will return normal page
> >>for this case. And this interface has not been used on windows so far
> >>
> >>This patch introduces qemu_file_get_page_size() to unify the code
> >>
> >>Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> >[...]
> >>diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> >>index 914cef5..51437ff 100644
> >>--- a/util/oslib-posix.c
> >>+++ b/util/oslib-posix.c
> >>@@ -340,7 +340,7 @@ static void sigbus_handler(int signal)
> >>      siglongjmp(sigjump, 1);
> >>  }
> >>
> >>-static size_t fd_getpagesize(int fd)
> >>+static size_t fd_getpagesize(int fd, Error **errp)
> >>  {
> >>  #ifdef CONFIG_LINUX
> >>      struct statfs fs;
> >>@@ -351,7 +351,12 @@ static size_t fd_getpagesize(int fd)
> >>              ret = fstatfs(fd, &fs);
> >>          } while (ret != 0 && errno == EINTR);
> >>
> >>-        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
> >>+        if (ret) {
> >>+            error_setg_errno(errp, errno, "fstatfs is failed");
> >>+            return 0;
> >>+        }
> >>+
> >>+        if (fs.f_type == HUGETLBFS_MAGIC) {
> >>              return fs.f_bsize;
> >>          }
> >
> >You are changing os_mem_prealloc() behavior when fstatfs() fails, here.
> >Have you ensured there are no cases where fstatfs() fails but this code
> >is still expected to work?
> 
> stat() is supported for all kinds of files, so failed stat() is caused by
> file is not exist or kernel internal error (e,g memory is not enough) or
> security check is not passed. Whichever we should not do any operation on
> the file if stat() failed. The origin code did not check it but it is worth
> being fixed i think.

Note that this is fstatfs(), not stat(). It's possible go get
ENOSYS as error from statfs() if it is not implemented by the
filesystem, I just don't know if this really can happen in
practice.

(But the answer won't matter, as we already aborted on statfs()
errors on all codepaths that call fd_getpagesize(). See below.)

> 
> >
> >The change looks safe: gethugepagesize() seems to be always called in
> >the path that would make fd_getpagesize() be called from
> >os_mem_prealloc(), so allocation would abort much earlier if statfs()
> >failed. But I haven't confirmed that yet, and I wanted to be sure.
> >
> 
> Yes, I am entirely agree with you. :)
> 

I have just confirmed that this is the case, as:

* fd_getpagesize() is only called from os_mem_prealloc(),
* os_mem_prealloc() is called from:
  * host_memory_backend_set_prealloc()
    * Using memory_region_get_fd() as the fd argument
  * host_memory_backend_memory_complete()
    * Using memory_region_get_fd() as the fd argument
  * file_ram_alloc()
    * After qemu_file_get_page_size()/gethugepagesize() was already called
      in the same fd (with errors checked)
* fd_getpagesize() checks for fd == -1
* The only code that sets the fd for a RAMBlock is file_ram_alloc(),
  which checks for qemu_file_get_page_size()/gethugepagesize()
  errors

So, it was already impossible to get os_mem_prealloc() called
with fd != -1 without having gethugepagesize() called first (and
gethugepagesize() already checked for statfs() errors).

Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>

Patch
diff mbox

diff --git a/exec.c b/exec.c
index 8af2570..9de38be 100644
--- a/exec.c
+++ b/exec.c
@@ -1174,32 +1174,6 @@  void qemu_mutex_unlock_ramlist(void)
 }
 
 #ifdef __linux__
-
-#include <sys/vfs.h>
-
-#define HUGETLBFS_MAGIC       0x958458f6
-
-static long gethugepagesize(const char *path, Error **errp)
-{
-    struct statfs fs;
-    int ret;
-
-    do {
-        ret = statfs(path, &fs);
-    } while (ret != 0 && errno == EINTR);
-
-    if (ret != 0) {
-        error_setg_errno(errp, errno, "failed to get page size of file %s",
-                         path);
-        return 0;
-    }
-
-    if (fs.f_type != HUGETLBFS_MAGIC)
-        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
-
-    return fs.f_bsize;
-}
-
 static void *file_ram_alloc(RAMBlock *block,
                             ram_addr_t memory,
                             const char *path,
@@ -1213,11 +1187,16 @@  static void *file_ram_alloc(RAMBlock *block,
     uint64_t hpagesize;
     Error *local_err = NULL;
 
-    hpagesize = gethugepagesize(path, &local_err);
+    hpagesize = qemu_file_get_page_size(path, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         goto error;
     }
+
+    if (hpagesize == getpagesize()) {
+        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+    }
+
     block->mr->align = hpagesize;
 
     if (memory < hpagesize) {
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index b568424..dbc17dc 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -302,4 +302,5 @@  int qemu_read_password(char *buf, int buf_size);
  */
 pid_t qemu_fork(Error **errp);
 
+size_t qemu_file_get_page_size(const char *mem_path, Error **errp);
 #endif
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index ac70f08..d8760ea 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -308,28 +308,15 @@  static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 
 static long gethugepagesize(const char *mem_path)
 {
-    struct statfs fs;
-    int ret;
-
-    do {
-        ret = statfs(mem_path, &fs);
-    } while (ret != 0 && errno == EINTR);
+    Error *local_err = NULL;
+    long size = qemu_file_get_page_size(mem_path, local_err);
 
-    if (ret != 0) {
-        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
-                strerror(errno));
+    if (local_err) {
+        error_report_err(local_err);
         exit(1);
     }
 
-#define HUGETLBFS_MAGIC       0x958458f6
-
-    if (fs.f_type != HUGETLBFS_MAGIC) {
-        /* Explicit mempath, but it's ordinary pages */
-        return getpagesize();
-    }
-
-    /* It's hugepage, return the huge page size */
-    return fs.f_bsize;
+    return size;
 }
 
 static int find_max_supported_pagesize(Object *obj, void *opaque)
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 914cef5..51437ff 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -340,7 +340,7 @@  static void sigbus_handler(int signal)
     siglongjmp(sigjump, 1);
 }
 
-static size_t fd_getpagesize(int fd)
+static size_t fd_getpagesize(int fd, Error **errp)
 {
 #ifdef CONFIG_LINUX
     struct statfs fs;
@@ -351,7 +351,12 @@  static size_t fd_getpagesize(int fd)
             ret = fstatfs(fd, &fs);
         } while (ret != 0 && errno == EINTR);
 
-        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
+        if (ret) {
+            error_setg_errno(errp, errno, "fstatfs is failed");
+            return 0;
+        }
+
+        if (fs.f_type == HUGETLBFS_MAGIC) {
             return fs.f_bsize;
         }
     }
@@ -360,6 +365,22 @@  static size_t fd_getpagesize(int fd)
     return getpagesize();
 }
 
+size_t qemu_file_get_page_size(const char *path, Error **errp)
+{
+    size_t size = 0;
+    int fd = qemu_open(path, O_RDONLY);
+
+    if (fd < 0) {
+        error_setg_file_open(errp, errno, path);
+        goto exit;
+    }
+
+    size = fd_getpagesize(fd, errp);
+    qemu_close(fd);
+exit:
+    return size;
+}
+
 void os_mem_prealloc(int fd, char *area, size_t memory)
 {
     int ret;
@@ -387,8 +408,16 @@  void os_mem_prealloc(int fd, char *area, size_t memory)
         exit(1);
     } else {
         int i;
-        size_t hpagesize = fd_getpagesize(fd);
-        size_t numpages = DIV_ROUND_UP(memory, hpagesize);
+        Error *local_err = NULL;
+        size_t hpagesize = fd_getpagesize(fd, &local_err);
+        size_t numpages;
+
+        if (local_err) {
+            error_report_err(local_err);
+            exit(1);
+        }
+
+        numpages = DIV_ROUND_UP(memory, hpagesize);
 
         /* MAP_POPULATE silently ignores failures */
         for (i = 0; i < numpages; i++) {
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 09f9e98..dada6b6 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -462,6 +462,11 @@  size_t getpagesize(void)
     return system_info.dwPageSize;
 }
 
+size_t qemu_file_get_page_size(const char *path, Error **errp)
+{
+    return getpagesize();
+}
+
 void os_mem_prealloc(int fd, char *area, size_t memory)
 {
     int i;