Message ID | 0b17cd9e914372c4790296b2cc21d6dd6e6d5466.1546399191.git.yi.z.zhang@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | support MAP_SYNC for memory-backend-file | expand |
On Wed, Jan 02, 2019 at 01:26:15PM +0800, Zhang Yi wrote: > When a file supporting DAX is used as vNVDIMM backend, mmap it with > MAP_SYNC flag in addition which can ensure file system metadata > synced in each guest writes to the backend file, without other QEMU > actions (e.g., periodic fsync() by QEMU). > > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> > Signed-off-by: Zhang Yi <yi.z.zhang@linux.intel.com> > --- > include/qemu/osdep.h | 16 ++++++++++++++++ > util/mmap-alloc.c | 12 +++++++++++- > 2 files changed, 27 insertions(+), 1 deletion(-) > > diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h > index 3bf48bc..bb1eba1 100644 > --- a/include/qemu/osdep.h > +++ b/include/qemu/osdep.h > @@ -410,6 +410,22 @@ void qemu_anon_ram_free(void *ptr, size_t size); > # define QEMU_VMALLOC_ALIGN getpagesize() > #endif > > +/* > + * MAP_SHARED_VALIDATE and MAP_SYNC are introduced in Linux kernel > + * 4.15, so they may not be defined when compiling on older kernels. > + */ > +#ifdef CONFIG_LINUX > + > +#include <asm-generic/mman.h> > + > +#ifndef MAP_SYNC > +#define MAP_SYNC 0x0 > +#endif > + > +#else /* !CONFIG_LINUX */ > +#define MAP_SYNC 0x0 > +#endif /* CONFIG_LINUX */ > + > #ifdef CONFIG_POSIX > struct qemu_signalfd_siginfo { > uint32_t ssi_signo; /* Signal number */ > diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c > index 8f0a740..a9d5e56 100644 > --- a/util/mmap-alloc.c > +++ b/util/mmap-alloc.c > @@ -99,6 +99,8 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, uint32_t flags) > void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > #endif > bool shared = flags & RAM_SHARED; > + bool is_pmem = flags & RAM_PMEM; > + int mmap_xflags = 0; > size_t offset; > void *ptr1; > > @@ -109,13 +111,21 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, uint32_t flags) > assert(is_power_of_2(align)); > /* Always align to host page size */ > assert(align >= getpagesize()); > + if (shared && is_pmem) { > + mmap_xflags |= MAP_SYNC; > + } > > offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; > + retry_mmap_fd: > ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, > MAP_FIXED | > (fd == -1 ? MAP_ANONYMOUS : 0) | > - (shared ? MAP_SHARED : MAP_PRIVATE), > + (shared ? MAP_SHARED : MAP_PRIVATE) | mmap_xflags, > fd, 0); > + if ((ptr1 == MAP_FAILED) && (mmap_xflags & MAP_SYNC)) { > + mmap_xflags &= ~MAP_SYNC; > + goto retry_mmap_fd; Do we have use cases where using pmem=on without MAP_SYNC isn't going to cause problems? If not, shouldn't we at least print a warning here? Otherwise, won't we still need an option for cases that require MAP_SYNC to be working? > + }
On 2019-01-14 at 17:07:02 -0200, Eduardo Habkost wrote: > On Wed, Jan 02, 2019 at 01:26:15PM +0800, Zhang Yi wrote: > > When a file supporting DAX is used as vNVDIMM backend, mmap it with > > MAP_SYNC flag in addition which can ensure file system metadata > > synced in each guest writes to the backend file, without other QEMU > > actions (e.g., periodic fsync() by QEMU). > > > > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> > > Signed-off-by: Zhang Yi <yi.z.zhang@linux.intel.com> > > --- > > include/qemu/osdep.h | 16 ++++++++++++++++ > > util/mmap-alloc.c | 12 +++++++++++- > > 2 files changed, 27 insertions(+), 1 deletion(-) > > > > diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h > > index 3bf48bc..bb1eba1 100644 > > --- a/include/qemu/osdep.h > > +++ b/include/qemu/osdep.h > > @@ -410,6 +410,22 @@ void qemu_anon_ram_free(void *ptr, size_t size); > > # define QEMU_VMALLOC_ALIGN getpagesize() > > #endif > > > > +/* > > + * MAP_SHARED_VALIDATE and MAP_SYNC are introduced in Linux kernel > > + * 4.15, so they may not be defined when compiling on older kernels. > > + */ > > +#ifdef CONFIG_LINUX > > + > > +#include <asm-generic/mman.h> > > + > > +#ifndef MAP_SYNC > > +#define MAP_SYNC 0x0 > > +#endif > > + > > +#else /* !CONFIG_LINUX */ > > +#define MAP_SYNC 0x0 > > +#endif /* CONFIG_LINUX */ > > + > > #ifdef CONFIG_POSIX > > struct qemu_signalfd_siginfo { > > uint32_t ssi_signo; /* Signal number */ > > diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c > > index 8f0a740..a9d5e56 100644 > > --- a/util/mmap-alloc.c > > +++ b/util/mmap-alloc.c > > @@ -99,6 +99,8 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, uint32_t flags) > > void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > > #endif > > bool shared = flags & RAM_SHARED; > > + bool is_pmem = flags & RAM_PMEM; > > + int mmap_xflags = 0; > > size_t offset; > > void *ptr1; > > > > @@ -109,13 +111,21 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, uint32_t flags) > > assert(is_power_of_2(align)); > > /* Always align to host page size */ > > assert(align >= getpagesize()); > > + if (shared && is_pmem) { > > + mmap_xflags |= MAP_SYNC; > > + } > > > > offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; > > + retry_mmap_fd: > > ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, > > MAP_FIXED | > > (fd == -1 ? MAP_ANONYMOUS : 0) | > > - (shared ? MAP_SHARED : MAP_PRIVATE), > > + (shared ? MAP_SHARED : MAP_PRIVATE) | mmap_xflags, > > fd, 0); > > + if ((ptr1 == MAP_FAILED) && (mmap_xflags & MAP_SYNC)) { > > + mmap_xflags &= ~MAP_SYNC; > > + goto retry_mmap_fd; > > Do we have use cases where using pmem=on without MAP_SYNC isn't > going to cause problems? If not, shouldn't we at least print a Yes, we have a case that direct use dax device but not a files on dax aware file system, we prefer to don't set the MAP_SYNC if user haven't much knowledge about that. it may took some potencial performance issues with MAP_SYNC. > warning here? Otherwise, won't we still need an option for cases > that require MAP_SYNC to be working? > > > + } > > -- > Eduardo
On Tue, Jan 15, 2019 at 10:49:45AM +0800, Yi Zhang wrote: > On 2019-01-14 at 17:07:02 -0200, Eduardo Habkost wrote: > > On Wed, Jan 02, 2019 at 01:26:15PM +0800, Zhang Yi wrote: > > > When a file supporting DAX is used as vNVDIMM backend, mmap it with > > > MAP_SYNC flag in addition which can ensure file system metadata > > > synced in each guest writes to the backend file, without other QEMU > > > actions (e.g., periodic fsync() by QEMU). > > > > > > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> > > > Signed-off-by: Zhang Yi <yi.z.zhang@linux.intel.com> > > > --- > > > include/qemu/osdep.h | 16 ++++++++++++++++ > > > util/mmap-alloc.c | 12 +++++++++++- > > > 2 files changed, 27 insertions(+), 1 deletion(-) > > > > > > diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h > > > index 3bf48bc..bb1eba1 100644 > > > --- a/include/qemu/osdep.h > > > +++ b/include/qemu/osdep.h > > > @@ -410,6 +410,22 @@ void qemu_anon_ram_free(void *ptr, size_t size); > > > # define QEMU_VMALLOC_ALIGN getpagesize() > > > #endif > > > > > > +/* > > > + * MAP_SHARED_VALIDATE and MAP_SYNC are introduced in Linux kernel > > > + * 4.15, so they may not be defined when compiling on older kernels. > > > + */ > > > +#ifdef CONFIG_LINUX > > > + > > > +#include <asm-generic/mman.h> > > > + > > > +#ifndef MAP_SYNC > > > +#define MAP_SYNC 0x0 > > > +#endif > > > + > > > +#else /* !CONFIG_LINUX */ > > > +#define MAP_SYNC 0x0 > > > +#endif /* CONFIG_LINUX */ > > > + > > > #ifdef CONFIG_POSIX > > > struct qemu_signalfd_siginfo { > > > uint32_t ssi_signo; /* Signal number */ > > > diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c > > > index 8f0a740..a9d5e56 100644 > > > --- a/util/mmap-alloc.c > > > +++ b/util/mmap-alloc.c > > > @@ -99,6 +99,8 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, uint32_t flags) > > > void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > > > #endif > > > bool shared = flags & RAM_SHARED; > > > + bool is_pmem = flags & RAM_PMEM; > > > + int mmap_xflags = 0; > > > size_t offset; > > > void *ptr1; > > > > > > @@ -109,13 +111,21 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, uint32_t flags) > > > assert(is_power_of_2(align)); > > > /* Always align to host page size */ > > > assert(align >= getpagesize()); > > > + if (shared && is_pmem) { > > > + mmap_xflags |= MAP_SYNC; > > > + } > > > > > > offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; > > > + retry_mmap_fd: > > > ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, > > > MAP_FIXED | > > > (fd == -1 ? MAP_ANONYMOUS : 0) | > > > - (shared ? MAP_SHARED : MAP_PRIVATE), > > > + (shared ? MAP_SHARED : MAP_PRIVATE) | mmap_xflags, > > > fd, 0); > > > + if ((ptr1 == MAP_FAILED) && (mmap_xflags & MAP_SYNC)) { > > > + mmap_xflags &= ~MAP_SYNC; > > > + goto retry_mmap_fd; > > > > Do we have use cases where using pmem=on without MAP_SYNC isn't > > going to cause problems? If not, shouldn't we at least print a > Yes, we have a case that direct use dax device but not a files on > dax aware file system, we prefer to don't set the MAP_SYNC if user > haven't much knowledge about that. it may took some potencial > performance issues with MAP_SYNC. I think you will have to be quite a bit more specific. If there's a performance / functionality tradeoff here then hiding it behind an option with an inscrutable name isn't a good idea. Neither is ignoring failures silently. > > warning here? Otherwise, won't we still need an option for cases > > that require MAP_SYNC to be working? > > > > > + } > > > > -- > > Eduardo
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 3bf48bc..bb1eba1 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -410,6 +410,22 @@ void qemu_anon_ram_free(void *ptr, size_t size); # define QEMU_VMALLOC_ALIGN getpagesize() #endif +/* + * MAP_SHARED_VALIDATE and MAP_SYNC are introduced in Linux kernel + * 4.15, so they may not be defined when compiling on older kernels. + */ +#ifdef CONFIG_LINUX + +#include <asm-generic/mman.h> + +#ifndef MAP_SYNC +#define MAP_SYNC 0x0 +#endif + +#else /* !CONFIG_LINUX */ +#define MAP_SYNC 0x0 +#endif /* CONFIG_LINUX */ + #ifdef CONFIG_POSIX struct qemu_signalfd_siginfo { uint32_t ssi_signo; /* Signal number */ diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c index 8f0a740..a9d5e56 100644 --- a/util/mmap-alloc.c +++ b/util/mmap-alloc.c @@ -99,6 +99,8 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, uint32_t flags) void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); #endif bool shared = flags & RAM_SHARED; + bool is_pmem = flags & RAM_PMEM; + int mmap_xflags = 0; size_t offset; void *ptr1; @@ -109,13 +111,21 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, uint32_t flags) assert(is_power_of_2(align)); /* Always align to host page size */ assert(align >= getpagesize()); + if (shared && is_pmem) { + mmap_xflags |= MAP_SYNC; + } offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; + retry_mmap_fd: ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, MAP_FIXED | (fd == -1 ? MAP_ANONYMOUS : 0) | - (shared ? MAP_SHARED : MAP_PRIVATE), + (shared ? MAP_SHARED : MAP_PRIVATE) | mmap_xflags, fd, 0); + if ((ptr1 == MAP_FAILED) && (mmap_xflags & MAP_SYNC)) { + mmap_xflags &= ~MAP_SYNC; + goto retry_mmap_fd; + } if (ptr1 == MAP_FAILED) { munmap(ptr, total); return MAP_FAILED;