Message ID | 20240131134843.3074922-2-mark.kanda@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Initialize backend memory objects in parallel | expand |
On 31.01.24 14:48, Mark Kanda wrote: > QEMU initializes preallocated backend memory as the objects are parsed from > the command line. This is not optimal in some cases (e.g. memory spanning > multiple NUMA nodes) because the memory objects are initialized in series. > > Allow the initialization to occur in parallel (asynchronously). In order to > ensure optimal thread placement, asynchronous initialization requires prealloc > context threads to be in use. > > Signed-off-by: Mark Kanda <mark.kanda@oracle.com> > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > backends/hostmem.c | 8 ++- > hw/virtio/virtio-mem.c | 4 +- > include/qemu/osdep.h | 18 +++++- > system/vl.c | 8 +++ > util/oslib-posix.c | 131 +++++++++++++++++++++++++++++++---------- > util/oslib-win32.c | 8 ++- > 6 files changed, 140 insertions(+), 37 deletions(-) > > diff --git a/backends/hostmem.c b/backends/hostmem.c > index 30f69b2cb5..8f602dc86f 100644 > --- a/backends/hostmem.c > +++ b/backends/hostmem.c > @@ -20,6 +20,7 @@ > #include "qom/object_interfaces.h" > #include "qemu/mmap-alloc.h" > #include "qemu/madvise.h" > +#include "hw/qdev-core.h" > > #ifdef CONFIG_NUMA > #include <numaif.h> > @@ -235,9 +236,10 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value, > int fd = memory_region_get_fd(&backend->mr); > void *ptr = memory_region_get_ram_ptr(&backend->mr); > uint64_t sz = memory_region_size(&backend->mr); > + bool async = !phase_check(PHASE_MACHINE_INITIALIZED); > > if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads, > - backend->prealloc_context, errp)) { > + backend->prealloc_context, async, errp)) { > return; > } I think we will never trigger that case: we would have to set the propertly after the device was already initialized, which shouldn't happen. So I guess we can simplify and drop that. > backend->prealloc = true; [...] > +++ b/include/qemu/osdep.h > @@ -680,6 +680,8 @@ typedef struct ThreadContext ThreadContext; > * @area: start address of the are to preallocate > * @sz: the size of the area to preallocate > * @max_threads: maximum number of threads to use > + * @tc: prealloc context threads pointer, NULL if not in use > + * @async: request asynchronous preallocation, requires @tc > * @errp: returns an error if this function fails > * > * Preallocate memory (populate/prefault page tables writable) for the virtual > @@ -687,10 +689,24 @@ typedef struct ThreadContext ThreadContext; > * each page in the area was faulted in writable at least once, for example, > * after allocating file blocks for mapped files. > * > + * When setting @async, allocation might be performed asynchronously. > + * qemu_finish_async_mem_prealloc() must be called to finish any asynchronous > + * preallocation. > + * > * Return: true on success, else false setting @errp with error. > */ > bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, > - ThreadContext *tc, Error **errp); > + ThreadContext *tc, bool async, Error **errp); > + > +/** > + * qemu_finish_async_mem_prealloc: > + * @errp: returns an error if this function fails > + * > + * Finish all outstanding asynchronous memory preallocation. > + * > + * Return: true on success, else false setting @errp with error. > + */ > +bool qemu_finish_async_mem_prealloc(Error **errp); Suboptimal suggestion from my side, guess it woud be better to call this "qemu_finish_async_prealloc_mem" to match "qemu_prealloc_mem" > > /** > * qemu_get_pid_name: > diff --git a/system/vl.c b/system/vl.c > index 788d88ea03..290bb3232b 100644 > --- a/system/vl.c > +++ b/system/vl.c > @@ -2009,6 +2009,14 @@ static void qemu_create_late_backends(void) > > object_option_foreach_add(object_create_late); > > + /* > + * Wait for any outstanding memory prealloc from created memory > + * backends to complete. > + */ > + if (!qemu_finish_async_mem_prealloc(&error_fatal)) { > + exit(1); > + } > + I'm wondering if we should have a new phase instead, like PHASE_LATE_OBJECTS_CREATED. and do here phase_advance(PHASE_LATE_OBJECTS_CREATED); and use that instead. Currently, there is a "gap" between both things. I don't think anything is actually broken right now (because any internal memory abckend wouldn't have a thread context), but it might be much cleaner and obvious that way. Apart from that LGTM!
On 1/31/24 8:04 AM, David Hildenbrand wrote: > On 31.01.24 14:48, Mark Kanda wrote: >> QEMU initializes preallocated backend memory as the objects are >> parsed from >> the command line. This is not optimal in some cases (e.g. memory >> spanning >> multiple NUMA nodes) because the memory objects are initialized in >> series. >> >> Allow the initialization to occur in parallel (asynchronously). In >> order to >> ensure optimal thread placement, asynchronous initialization requires >> prealloc >> context threads to be in use. >> >> Signed-off-by: Mark Kanda <mark.kanda@oracle.com> >> Signed-off-by: David Hildenbrand <david@redhat.com> >> --- >> backends/hostmem.c | 8 ++- >> hw/virtio/virtio-mem.c | 4 +- >> include/qemu/osdep.h | 18 +++++- >> system/vl.c | 8 +++ >> util/oslib-posix.c | 131 +++++++++++++++++++++++++++++++---------- >> util/oslib-win32.c | 8 ++- >> 6 files changed, 140 insertions(+), 37 deletions(-) >> >> diff --git a/backends/hostmem.c b/backends/hostmem.c >> index 30f69b2cb5..8f602dc86f 100644 >> --- a/backends/hostmem.c >> +++ b/backends/hostmem.c >> @@ -20,6 +20,7 @@ >> #include "qom/object_interfaces.h" >> #include "qemu/mmap-alloc.h" >> #include "qemu/madvise.h" >> +#include "hw/qdev-core.h" >> #ifdef CONFIG_NUMA >> #include <numaif.h> >> @@ -235,9 +236,10 @@ static void >> host_memory_backend_set_prealloc(Object *obj, bool value, >> int fd = memory_region_get_fd(&backend->mr); >> void *ptr = memory_region_get_ram_ptr(&backend->mr); >> uint64_t sz = memory_region_size(&backend->mr); >> + bool async = !phase_check(PHASE_MACHINE_INITIALIZED); >> if (!qemu_prealloc_mem(fd, ptr, sz, >> backend->prealloc_threads, >> - backend->prealloc_context, errp)) { >> + backend->prealloc_context, async, >> errp)) { >> return; >> } > > I think we will never trigger that case: we would have to set the > propertly after the device was already initialized, which shouldn't > happen. > > So I guess we can simplify and drop that. > Will fix. >> backend->prealloc = true; > > > [...] > >> +++ b/include/qemu/osdep.h >> @@ -680,6 +680,8 @@ typedef struct ThreadContext ThreadContext; >> * @area: start address of the are to preallocate >> * @sz: the size of the area to preallocate >> * @max_threads: maximum number of threads to use >> + * @tc: prealloc context threads pointer, NULL if not in use >> + * @async: request asynchronous preallocation, requires @tc >> * @errp: returns an error if this function fails >> * >> * Preallocate memory (populate/prefault page tables writable) for >> the virtual >> @@ -687,10 +689,24 @@ typedef struct ThreadContext ThreadContext; >> * each page in the area was faulted in writable at least once, for >> example, >> * after allocating file blocks for mapped files. >> * >> + * When setting @async, allocation might be performed asynchronously. >> + * qemu_finish_async_mem_prealloc() must be called to finish any >> asynchronous >> + * preallocation. >> + * >> * Return: true on success, else false setting @errp with error. >> */ >> bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, >> - ThreadContext *tc, Error **errp); >> + ThreadContext *tc, bool async, Error **errp); >> + >> +/** >> + * qemu_finish_async_mem_prealloc: >> + * @errp: returns an error if this function fails >> + * >> + * Finish all outstanding asynchronous memory preallocation. >> + * >> + * Return: true on success, else false setting @errp with error. >> + */ >> +bool qemu_finish_async_mem_prealloc(Error **errp); > > Suboptimal suggestion from my side, guess it woud be better to call this > > "qemu_finish_async_prealloc_mem" to match "qemu_prealloc_mem" > Will fix. >> /** >> * qemu_get_pid_name: >> diff --git a/system/vl.c b/system/vl.c >> index 788d88ea03..290bb3232b 100644 >> --- a/system/vl.c >> +++ b/system/vl.c >> @@ -2009,6 +2009,14 @@ static void qemu_create_late_backends(void) >> object_option_foreach_add(object_create_late); >> + /* >> + * Wait for any outstanding memory prealloc from created memory >> + * backends to complete. >> + */ >> + if (!qemu_finish_async_mem_prealloc(&error_fatal)) { >> + exit(1); >> + } >> + > > I'm wondering if we should have a new phase instead, like > > PHASE_LATE_OBJECTS_CREATED. > > and do here > > phase_advance(PHASE_LATE_OBJECTS_CREATED); > > and use that instead. Currently, there is a "gap" between both things. > I don't think anything is actually broken right now (because any > internal memory abckend wouldn't have a thread context), but it might > be much cleaner and obvious that way. > OK. I'll call it 'PHASE_LATE_BACKENDS_CREATED' (to make it consistent with code comments/function name). > Apart from that LGTM! > Thanks/regards, -Mark
> > OK. I'll call it 'PHASE_LATE_BACKENDS_CREATED' (to make it consistent > with code comments/function name). But then, you should set it at the very end of the function (not sure if that would be a problem with the other devices that are getting created in between -- if they would be using one of these memory backends; likely not).
On 1/31/24 8:30 AM, David Hildenbrand wrote: >> >> OK. I'll call it 'PHASE_LATE_BACKENDS_CREATED' (to make it consistent >> with code comments/function name). > > But then, you should set it at the very end of the function (not sure > if that would be a problem with the other devices that are getting > created in between -- if they would be using one of these memory > backends; likely not). > I think I misunderstood your suggestion. I was planning to add it a 'phase_advance(PHASE_LATE_BACKENDS_CREATED)' to qemu_init(): @@ -3703,6 +3703,7 @@ void qemu_init(int argc, char **argv) * over memory-backend-file objects). */ qemu_create_late_backends(); + phase_advance(PHASE_LATE_BACKENDS_CREATED); And use PHASE_LATE_BACKENDS_CREATED (instead of PHASE_MACHINE_INITIALIZED) for the async bool in host_memory_backend_memory_complete(). I was planning to leave this call where it is: @@ -2009,6 +2009,14 @@ static void qemu_create_late_backends(void) object_option_foreach_add(object_create_late); + /* + * Wait for any outstanding memory prealloc from created memory + * backends to complete. + */ + if (!qemu_finish_async_mem_prealloc(&error_fatal)) { + exit(1); + } + if (tpm_init() < 0) { exit(1); } Is this what you had in mind? Thanks/regards, -Mark
On 31.01.24 15:48, Mark Kanda wrote: > On 1/31/24 8:30 AM, David Hildenbrand wrote: >>> >>> OK. I'll call it 'PHASE_LATE_BACKENDS_CREATED' (to make it consistent >>> with code comments/function name). >> >> But then, you should set it at the very end of the function (not sure >> if that would be a problem with the other devices that are getting >> created in between -- if they would be using one of these memory >> backends; likely not). >> > > I think I misunderstood your suggestion. I was planning to add it a > 'phase_advance(PHASE_LATE_BACKENDS_CREATED)' to qemu_init(): > > @@ -3703,6 +3703,7 @@ void qemu_init(int argc, char **argv) > * over memory-backend-file objects). > */ > qemu_create_late_backends(); > + phase_advance(PHASE_LATE_BACKENDS_CREATED); > > And use PHASE_LATE_BACKENDS_CREATED (instead of > PHASE_MACHINE_INITIALIZED) for the async bool in > host_memory_backend_memory_complete(). > > I was planning to leave this call where it is: > > @@ -2009,6 +2009,14 @@ static void qemu_create_late_backends(void) > > object_option_foreach_add(object_create_late); > > + /* > + * Wait for any outstanding memory prealloc from created memory > + * backends to complete. > + */ > + if (!qemu_finish_async_mem_prealloc(&error_fatal)) { > + exit(1); > + } > + > if (tpm_init() < 0) { > exit(1); > } > Yes. The only "suboptimal" things is that if someone where to create a memory backend between qemu_finish_async_mem_prealloc() and phase_advance(PHASE_LATE_BACKENDS_CREATED), it would never get preallocated. That likely won't ever happen by any of the remaining stuff in qemu_create_late_backends(), especially not with "prealloc=on" and thread-contexts set.
On 1/31/24 8:57 AM, David Hildenbrand wrote: > On 31.01.24 15:48, Mark Kanda wrote: >> On 1/31/24 8:30 AM, David Hildenbrand wrote: >>>> >>>> OK. I'll call it 'PHASE_LATE_BACKENDS_CREATED' (to make it consistent >>>> with code comments/function name). >>> >>> But then, you should set it at the very end of the function (not sure >>> if that would be a problem with the other devices that are getting >>> created in between -- if they would be using one of these memory >>> backends; likely not). >>> >> >> I think I misunderstood your suggestion. I was planning to add it a >> 'phase_advance(PHASE_LATE_BACKENDS_CREATED)' to qemu_init(): >> >> @@ -3703,6 +3703,7 @@ void qemu_init(int argc, char **argv) >> * over memory-backend-file objects). >> */ >> qemu_create_late_backends(); >> + phase_advance(PHASE_LATE_BACKENDS_CREATED); >> >> And use PHASE_LATE_BACKENDS_CREATED (instead of >> PHASE_MACHINE_INITIALIZED) for the async bool in >> host_memory_backend_memory_complete(). >> >> I was planning to leave this call where it is: >> >> @@ -2009,6 +2009,14 @@ static void qemu_create_late_backends(void) >> >> object_option_foreach_add(object_create_late); >> >> + /* >> + * Wait for any outstanding memory prealloc from created >> memory >> + * backends to complete. >> + */ >> + if (!qemu_finish_async_mem_prealloc(&error_fatal)) { >> + exit(1); >> + } >> + >> if (tpm_init() < 0) { >> exit(1); >> } >> > > Yes. The only "suboptimal" things is that if someone where to create a > memory backend between qemu_finish_async_mem_prealloc() and > phase_advance(PHASE_LATE_BACKENDS_CREATED), it would never get > preallocated. > > That likely won't ever happen by any of the remaining stuff in > qemu_create_late_backends(), especially not with "prealloc=on" and > thread-contexts set. > Yep. OK, I'll go with that. Thanks again!
diff --git a/backends/hostmem.c b/backends/hostmem.c index 30f69b2cb5..8f602dc86f 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -20,6 +20,7 @@ #include "qom/object_interfaces.h" #include "qemu/mmap-alloc.h" #include "qemu/madvise.h" +#include "hw/qdev-core.h" #ifdef CONFIG_NUMA #include <numaif.h> @@ -235,9 +236,10 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value, int fd = memory_region_get_fd(&backend->mr); void *ptr = memory_region_get_ram_ptr(&backend->mr); uint64_t sz = memory_region_size(&backend->mr); + bool async = !phase_check(PHASE_MACHINE_INITIALIZED); if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads, - backend->prealloc_context, errp)) { + backend->prealloc_context, async, errp)) { return; } backend->prealloc = true; @@ -323,6 +325,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); void *ptr; uint64_t sz; + bool async = !phase_check(PHASE_MACHINE_INITIALIZED); if (!bc->alloc) { return; @@ -398,7 +401,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz, backend->prealloc_threads, - backend->prealloc_context, errp)) { + backend->prealloc_context, + async, errp)) { return; } } diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 99ab989852..ffd119ebac 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -605,7 +605,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, int fd = memory_region_get_fd(&vmem->memdev->mr); Error *local_err = NULL; - if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) { + if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) { static bool warned; /* @@ -1248,7 +1248,7 @@ static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg, int fd = memory_region_get_fd(&vmem->memdev->mr); Error *local_err = NULL; - if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) { + if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) { error_report_err(local_err); return -ENOMEM; } diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index c9692cc314..f45954b512 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -680,6 +680,8 @@ typedef struct ThreadContext ThreadContext; * @area: start address of the are to preallocate * @sz: the size of the area to preallocate * @max_threads: maximum number of threads to use + * @tc: prealloc context threads pointer, NULL if not in use + * @async: request asynchronous preallocation, requires @tc * @errp: returns an error if this function fails * * Preallocate memory (populate/prefault page tables writable) for the virtual @@ -687,10 +689,24 @@ typedef struct ThreadContext ThreadContext; * each page in the area was faulted in writable at least once, for example, * after allocating file blocks for mapped files. * + * When setting @async, allocation might be performed asynchronously. + * qemu_finish_async_mem_prealloc() must be called to finish any asynchronous + * preallocation. + * * Return: true on success, else false setting @errp with error. */ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, - ThreadContext *tc, Error **errp); + ThreadContext *tc, bool async, Error **errp); + +/** + * qemu_finish_async_mem_prealloc: + * @errp: returns an error if this function fails + * + * Finish all outstanding asynchronous memory preallocation. + * + * Return: true on success, else false setting @errp with error. + */ +bool qemu_finish_async_mem_prealloc(Error **errp); /** * qemu_get_pid_name: diff --git a/system/vl.c b/system/vl.c index 788d88ea03..290bb3232b 100644 --- a/system/vl.c +++ b/system/vl.c @@ -2009,6 +2009,14 @@ static void qemu_create_late_backends(void) object_option_foreach_add(object_create_late); + /* + * Wait for any outstanding memory prealloc from created memory + * backends to complete. + */ + if (!qemu_finish_async_mem_prealloc(&error_fatal)) { + exit(1); + } + if (tpm_init() < 0) { exit(1); } diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 7c297003b9..74493e3cf7 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -42,6 +42,7 @@ #include "qemu/cutils.h" #include "qemu/units.h" #include "qemu/thread-context.h" +#include "qemu/main-loop.h" #ifdef CONFIG_LINUX #include <sys/syscall.h> @@ -63,11 +64,15 @@ struct MemsetThread; +static QLIST_HEAD(, MemsetContext) memset_contexts = + QLIST_HEAD_INITIALIZER(memset_contexts); + typedef struct MemsetContext { bool all_threads_created; bool any_thread_failed; struct MemsetThread *threads; int num_threads; + QLIST_ENTRY(MemsetContext) next; } MemsetContext; struct MemsetThread { @@ -412,19 +417,44 @@ static inline int get_memset_num_threads(size_t hpagesize, size_t numpages, return ret; } +static int wait_and_free_mem_prealloc_context(MemsetContext *context) +{ + int i, ret = 0, tmp; + + for (i = 0; i < context->num_threads; i++) { + tmp = (uintptr_t)qemu_thread_join(&context->threads[i].pgthread); + + if (tmp) { + ret = tmp; + } + } + g_free(context->threads); + g_free(context); + return ret; +} + static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, - int max_threads, ThreadContext *tc, + int max_threads, ThreadContext *tc, bool async, bool use_madv_populate_write) { static gsize initialized = 0; - MemsetContext context = { - .num_threads = get_memset_num_threads(hpagesize, numpages, max_threads), - }; + MemsetContext *context = g_malloc0(sizeof(MemsetContext)); size_t numpages_per_thread, leftover; void *(*touch_fn)(void *); - int ret = 0, i = 0; + int ret, i = 0; char *addr = area; + /* + * Asynchronous preallocation is only allowed when using MADV_POPULATE_WRITE + * and prealloc context for thread placement. + */ + if (!use_madv_populate_write || !tc) { + async = false; + } + + context->num_threads = + get_memset_num_threads(hpagesize, numpages, max_threads); + if (g_once_init_enter(&initialized)) { qemu_mutex_init(&page_mutex); qemu_cond_init(&page_cond); @@ -432,8 +462,11 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, } if (use_madv_populate_write) { - /* Avoid creating a single thread for MADV_POPULATE_WRITE */ - if (context.num_threads == 1) { + /* + * Avoid creating a single thread for MADV_POPULATE_WRITE when + * preallocating synchronously. + */ + if (context->num_threads == 1 && !async) { if (qemu_madvise(area, hpagesize * numpages, QEMU_MADV_POPULATE_WRITE)) { return -errno; @@ -445,50 +478,86 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, touch_fn = do_touch_pages; } - context.threads = g_new0(MemsetThread, context.num_threads); - numpages_per_thread = numpages / context.num_threads; - leftover = numpages % context.num_threads; - for (i = 0; i < context.num_threads; i++) { - context.threads[i].addr = addr; - context.threads[i].numpages = numpages_per_thread + (i < leftover); - context.threads[i].hpagesize = hpagesize; - context.threads[i].context = &context; + context->threads = g_new0(MemsetThread, context->num_threads); + numpages_per_thread = numpages / context->num_threads; + leftover = numpages % context->num_threads; + for (i = 0; i < context->num_threads; i++) { + context->threads[i].addr = addr; + context->threads[i].numpages = numpages_per_thread + (i < leftover); + context->threads[i].hpagesize = hpagesize; + context->threads[i].context = context; if (tc) { - thread_context_create_thread(tc, &context.threads[i].pgthread, + thread_context_create_thread(tc, &context->threads[i].pgthread, "touch_pages", - touch_fn, &context.threads[i], + touch_fn, &context->threads[i], QEMU_THREAD_JOINABLE); } else { - qemu_thread_create(&context.threads[i].pgthread, "touch_pages", - touch_fn, &context.threads[i], + qemu_thread_create(&context->threads[i].pgthread, "touch_pages", + touch_fn, &context->threads[i], QEMU_THREAD_JOINABLE); } - addr += context.threads[i].numpages * hpagesize; + addr += context->threads[i].numpages * hpagesize; + } + + if (async) { + /* + * async requests currently require the BQL. Add it to the list and kick + * preallocation off during qemu_finish_async_mem_prealloc(). + */ + assert(bql_locked()); + QLIST_INSERT_HEAD(&memset_contexts, context, next); + return 0; } if (!use_madv_populate_write) { - sigbus_memset_context = &context; + sigbus_memset_context = context; } qemu_mutex_lock(&page_mutex); - context.all_threads_created = true; + context->all_threads_created = true; qemu_cond_broadcast(&page_cond); qemu_mutex_unlock(&page_mutex); - for (i = 0; i < context.num_threads; i++) { - int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread); + ret = wait_and_free_mem_prealloc_context(context); + if (!use_madv_populate_write) { + sigbus_memset_context = NULL; + } + return ret; +} + +bool qemu_finish_async_mem_prealloc(Error **errp) +{ + int ret, tmp; + MemsetContext *context, *next_context; + + /* Waiting for preallocation requires the BQL. */ + assert(bql_locked()); + if (QLIST_EMPTY(&memset_contexts)) { + return true; + } + + qemu_mutex_lock(&page_mutex); + QLIST_FOREACH(context, &memset_contexts, next) { + context->all_threads_created = true; + } + qemu_cond_broadcast(&page_cond); + qemu_mutex_unlock(&page_mutex); + + QLIST_FOREACH_SAFE(context, &memset_contexts, next, next_context) { + QLIST_REMOVE(context, next); + tmp = wait_and_free_mem_prealloc_context(context); if (tmp) { ret = tmp; } } - if (!use_madv_populate_write) { - sigbus_memset_context = NULL; + if (ret) { + error_setg_errno(errp, -ret, + "qemu_prealloc_mem: preallocating memory failed"); + return false; } - g_free(context.threads); - - return ret; + return true; } static bool madv_populate_write_possible(char *area, size_t pagesize) @@ -498,7 +567,7 @@ static bool madv_populate_write_possible(char *area, size_t pagesize) } bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, - ThreadContext *tc, Error **errp) + ThreadContext *tc, bool async, Error **errp) { static gsize initialized; int ret; @@ -540,7 +609,7 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, } /* touch pages simultaneously */ - ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc, + ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc, async, use_madv_populate_write); if (ret) { error_setg_errno(errp, -ret, diff --git a/util/oslib-win32.c b/util/oslib-win32.c index c4a5f05a49..107f0efe37 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -265,7 +265,7 @@ int getpagesize(void) } bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, - ThreadContext *tc, Error **errp) + ThreadContext *tc, bool async, Error **errp) { int i; size_t pagesize = qemu_real_host_page_size(); @@ -278,6 +278,12 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, return true; } +bool qemu_finish_async_mem_prealloc(Error **errp) +{ + /* async prealloc not supported, there is nothing to finish */ + return true; +} + char *qemu_get_pid_name(pid_t pid) { /* XXX Implement me */