Message ID | 20180312172124.56461-10-dgilbert@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Mar 12, 2018 at 05:21:04PM +0000, Dr. David Alan Gilbert (git) wrote: > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > Allow other userfaultfd's to be registered into the fault thread > so that handlers for shared memory can get responses. > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > Reviewed-by: Peter Xu <peterx@redhat.com> new blank line at EOF errors. > --- > migration/migration.c | 6 ++ > migration/migration.h | 2 + > migration/postcopy-ram.c | 210 +++++++++++++++++++++++++++++++++++------------ > migration/postcopy-ram.h | 21 +++++ > migration/trace-events | 2 + > 5 files changed, 188 insertions(+), 53 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index 6a4780ef6f..1f22f463d3 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -155,6 +155,8 @@ MigrationIncomingState *migration_incoming_get_current(void) > if (!once) { > mis_current.state = MIGRATION_STATUS_NONE; > memset(&mis_current, 0, sizeof(MigrationIncomingState)); > + mis_current.postcopy_remote_fds = g_array_new(FALSE, TRUE, > + sizeof(struct PostCopyFD)); > qemu_mutex_init(&mis_current.rp_mutex); > qemu_event_init(&mis_current.main_thread_load_event, false); > once = true; > @@ -177,6 +179,10 @@ void migration_incoming_state_destroy(void) > qemu_fclose(mis->from_src_file); > mis->from_src_file = NULL; > } > + if (mis->postcopy_remote_fds) { > + g_array_free(mis->postcopy_remote_fds, TRUE); > + mis->postcopy_remote_fds = NULL; > + } > > qemu_event_reset(&mis->main_thread_load_event); > } > diff --git a/migration/migration.h b/migration/migration.h > index 08c5d2ded1..d02a759331 100644 > --- a/migration/migration.h > +++ b/migration/migration.h > @@ -51,6 +51,8 @@ struct MigrationIncomingState { > QemuMutex rp_mutex; /* We send replies from multiple threads */ > void *postcopy_tmp_page; > void *postcopy_tmp_zero_page; > + /* PostCopyFD's for external userfaultfds & handlers of shared memory */ > + GArray *postcopy_remote_fds; > > QEMUBH *bh; > > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c > index 1089814d54..4ab1b7d36d 100644 > --- a/migration/postcopy-ram.c > +++ b/migration/postcopy-ram.c > @@ -533,29 +533,44 @@ static void *postcopy_ram_fault_thread(void *opaque) > MigrationIncomingState *mis = opaque; > struct uffd_msg msg; > int ret; > + size_t index; > RAMBlock *rb = NULL; > RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */ > > trace_postcopy_ram_fault_thread_entry(); > qemu_sem_post(&mis->fault_thread_sem); > > + struct pollfd *pfd; > + size_t pfd_len = 2 + mis->postcopy_remote_fds->len; > + > + pfd = g_new0(struct pollfd, pfd_len); > + > + pfd[0].fd = mis->userfault_fd; > + pfd[0].events = POLLIN; > + pfd[1].fd = mis->userfault_event_fd; > + pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ > + trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd); > + for (index = 0; index < mis->postcopy_remote_fds->len; index++) { > + struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds, > + struct PostCopyFD, index); > + pfd[2 + index].fd = pcfd->fd; > + pfd[2 + index].events = POLLIN; > + trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr, > + pcfd->fd); > + } > + > while (true) { > ram_addr_t rb_offset; > - struct pollfd pfd[2]; > + int poll_result; > > /* > * We're mainly waiting for the kernel to give us a faulting HVA, > * however we can be told to quit via userfault_quit_fd which is > * an eventfd > */ > - pfd[0].fd = mis->userfault_fd; > - pfd[0].events = POLLIN; > - pfd[0].revents = 0; > - pfd[1].fd = mis->userfault_event_fd; > - pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ > - pfd[1].revents = 0; > - > - if (poll(pfd, 2, -1 /* Wait forever */) == -1) { > + > + poll_result = poll(pfd, pfd_len, -1 /* Wait forever */); > + if (poll_result == -1) { > error_report("%s: userfault poll: %s", __func__, strerror(errno)); > break; > } > @@ -575,57 +590,117 @@ static void *postcopy_ram_fault_thread(void *opaque) > } > } > > - ret = read(mis->userfault_fd, &msg, sizeof(msg)); > - if (ret != sizeof(msg)) { > - if (errno == EAGAIN) { > - /* > - * if a wake up happens on the other thread just after > - * the poll, there is nothing to read. > - */ > - continue; > + if (pfd[0].revents) { > + poll_result--; > + ret = read(mis->userfault_fd, &msg, sizeof(msg)); > + if (ret != sizeof(msg)) { > + if (errno == EAGAIN) { > + /* > + * if a wake up happens on the other thread just after > + * the poll, there is nothing to read. > + */ > + continue; > + } > + if (ret < 0) { > + error_report("%s: Failed to read full userfault " > + "message: %s", > + __func__, strerror(errno)); > + break; > + } else { > + error_report("%s: Read %d bytes from userfaultfd " > + "expected %zd", > + __func__, ret, sizeof(msg)); > + break; /* Lost alignment, don't know what we'd read next */ > + } > } > - if (ret < 0) { > - error_report("%s: Failed to read full userfault message: %s", > - __func__, strerror(errno)); > - break; > - } else { > - error_report("%s: Read %d bytes from userfaultfd expected %zd", > - __func__, ret, sizeof(msg)); > - break; /* Lost alignment, don't know what we'd read next */ > + if (msg.event != UFFD_EVENT_PAGEFAULT) { > + error_report("%s: Read unexpected event %ud from userfaultfd", > + __func__, msg.event); > + continue; /* It's not a page fault, shouldn't happen */ > } > - } > - if (msg.event != UFFD_EVENT_PAGEFAULT) { > - error_report("%s: Read unexpected event %ud from userfaultfd", > - __func__, msg.event); > - continue; /* It's not a page fault, shouldn't happen */ > - } > > - rb = qemu_ram_block_from_host( > - (void *)(uintptr_t)msg.arg.pagefault.address, > - true, &rb_offset); > - if (!rb) { > - error_report("postcopy_ram_fault_thread: Fault outside guest: %" > - PRIx64, (uint64_t)msg.arg.pagefault.address); > - break; > - } > + rb = qemu_ram_block_from_host( > + (void *)(uintptr_t)msg.arg.pagefault.address, > + true, &rb_offset); > + if (!rb) { > + error_report("postcopy_ram_fault_thread: Fault outside guest: %" > + PRIx64, (uint64_t)msg.arg.pagefault.address); > + break; > + } > > - rb_offset &= ~(qemu_ram_pagesize(rb) - 1); > - trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, > + rb_offset &= ~(qemu_ram_pagesize(rb) - 1); > + trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, > qemu_ram_get_idstr(rb), > rb_offset); > + /* > + * Send the request to the source - we want to request one > + * of our host page sizes (which is >= TPS) > + */ > + if (rb != last_rb) { > + last_rb = rb; > + migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), > + rb_offset, qemu_ram_pagesize(rb)); > + } else { > + /* Save some space */ > + migrate_send_rp_req_pages(mis, NULL, > + rb_offset, qemu_ram_pagesize(rb)); > + } > + } > > - /* > - * Send the request to the source - we want to request one > - * of our host page sizes (which is >= TPS) > - */ > - if (rb != last_rb) { > - last_rb = rb; > - migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), > - rb_offset, qemu_ram_pagesize(rb)); > - } else { > - /* Save some space */ > - migrate_send_rp_req_pages(mis, NULL, > - rb_offset, qemu_ram_pagesize(rb)); > + /* Now handle any requests from external processes on shared memory */ > + /* TODO: May need to handle devices deregistering during postcopy */ > + for (index = 2; index < pfd_len && poll_result; index++) { > + if (pfd[index].revents) { > + struct PostCopyFD *pcfd = > + &g_array_index(mis->postcopy_remote_fds, > + struct PostCopyFD, index - 2); > + > + poll_result--; > + if (pfd[index].revents & POLLERR) { > + error_report("%s: POLLERR on poll %zd fd=%d", > + __func__, index, pcfd->fd); > + pfd[index].events = 0; > + continue; > + } > + > + ret = read(pcfd->fd, &msg, sizeof(msg)); > + if (ret != sizeof(msg)) { > + if (errno == EAGAIN) { > + /* > + * if a wake up happens on the other thread just after > + * the poll, there is nothing to read. > + */ > + continue; > + } > + if (ret < 0) { > + error_report("%s: Failed to read full userfault " > + "message: %s (shared) revents=%d", > + __func__, strerror(errno), > + pfd[index].revents); > + /*TODO: Could just disable this sharer */ > + break; > + } else { > + error_report("%s: Read %d bytes from userfaultfd " > + "expected %zd (shared)", > + __func__, ret, sizeof(msg)); > + /*TODO: Could just disable this sharer */ > + break; /*Lost alignment,don't know what we'd read next*/ > + } > + } > + if (msg.event != UFFD_EVENT_PAGEFAULT) { > + error_report("%s: Read unexpected event %ud " > + "from userfaultfd (shared)", > + __func__, msg.event); > + continue; /* It's not a page fault, shouldn't happen */ > + } > + /* Call the device handler registered with us */ > + ret = pcfd->handler(pcfd, &msg); > + if (ret) { > + error_report("%s: Failed to resolve shared fault on %zd/%s", > + __func__, index, pcfd->idstr); > + /* TODO: Fail? Disable this sharer? */ > + } > + } > } > } > trace_postcopy_ram_fault_thread_exit(); > @@ -970,3 +1045,32 @@ PostcopyState postcopy_state_set(PostcopyState new_state) > { > return atomic_xchg(&incoming_postcopy_state, new_state); > } > + > +/* Register a handler for external shared memory postcopy > + * called on the destination. > + */ > +void postcopy_register_shared_ufd(struct PostCopyFD *pcfd) > +{ > + MigrationIncomingState *mis = migration_incoming_get_current(); > + > + mis->postcopy_remote_fds = g_array_append_val(mis->postcopy_remote_fds, > + *pcfd); > +} > + > +/* Unregister a handler for external shared memory postcopy > + */ > +void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) > +{ > + guint i; > + MigrationIncomingState *mis = migration_incoming_get_current(); > + GArray *pcrfds = mis->postcopy_remote_fds; > + > + for (i = 0; i < pcrfds->len; i++) { > + struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i); > + if (cur->fd == pcfd->fd) { > + mis->postcopy_remote_fds = g_array_remove_index(pcrfds, i); > + return; > + } > + } > +} > + > diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h > index 0421c98d57..f21eef6702 100644 > --- a/migration/postcopy-ram.h > +++ b/migration/postcopy-ram.h > @@ -143,4 +143,25 @@ void postcopy_remove_notifier(NotifierWithReturn *n); > /* Call the notifier list set by postcopy_add_start_notifier */ > int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp); > > +struct PostCopyFD; > + > +/* ufd is a pointer to the struct uffd_msg *TODO: more Portable! */ > +typedef int (*pcfdhandler)(struct PostCopyFD *pcfd, void *ufd); > + > +struct PostCopyFD { > + int fd; > + /* Data to pass to handler */ > + void *data; > + /* Handler to be called whenever we get a poll event */ > + pcfdhandler handler; > + /* A string to use in error messages */ > + const char *idstr; > +}; > + > +/* Register a userfaultfd owned by an external process for > + * shared memory. > + */ > +void postcopy_register_shared_ufd(struct PostCopyFD *pcfd); > +void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd); > + > #endif > diff --git a/migration/trace-events b/migration/trace-events > index 93961dea16..1e617ad7a6 100644 > --- a/migration/trace-events > +++ b/migration/trace-events > @@ -190,6 +190,8 @@ postcopy_place_page_zero(void *host_addr) "host=%p" > postcopy_ram_enable_notify(void) "" > postcopy_ram_fault_thread_entry(void) "" > postcopy_ram_fault_thread_exit(void) "" > +postcopy_ram_fault_thread_fds_core(int baseufd, int quitfd) "ufd: %d quitfd: %d" > +postcopy_ram_fault_thread_fds_extra(size_t index, const char *name, int fd) "%zd/%s: %d" > postcopy_ram_fault_thread_quit(void) "" > postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx" > postcopy_ram_incoming_cleanup_closeuf(void) "" > -- > 2.14.3
* Michael S. Tsirkin (mst@redhat.com) wrote: > On Mon, Mar 12, 2018 at 05:21:04PM +0000, Dr. David Alan Gilbert (git) wrote: > > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > > > Allow other userfaultfd's to be registered into the fault thread > > so that handlers for shared memory can get responses. > > > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > > Reviewed-by: Peter Xu <peterx@redhat.com> > > new blank line at EOF errors. Done. Dave > > --- > > migration/migration.c | 6 ++ > > migration/migration.h | 2 + > > migration/postcopy-ram.c | 210 +++++++++++++++++++++++++++++++++++------------ > > migration/postcopy-ram.h | 21 +++++ > > migration/trace-events | 2 + > > 5 files changed, 188 insertions(+), 53 deletions(-) > > > > diff --git a/migration/migration.c b/migration/migration.c > > index 6a4780ef6f..1f22f463d3 100644 > > --- a/migration/migration.c > > +++ b/migration/migration.c > > @@ -155,6 +155,8 @@ MigrationIncomingState *migration_incoming_get_current(void) > > if (!once) { > > mis_current.state = MIGRATION_STATUS_NONE; > > memset(&mis_current, 0, sizeof(MigrationIncomingState)); > > + mis_current.postcopy_remote_fds = g_array_new(FALSE, TRUE, > > + sizeof(struct PostCopyFD)); > > qemu_mutex_init(&mis_current.rp_mutex); > > qemu_event_init(&mis_current.main_thread_load_event, false); > > once = true; > > @@ -177,6 +179,10 @@ void migration_incoming_state_destroy(void) > > qemu_fclose(mis->from_src_file); > > mis->from_src_file = NULL; > > } > > + if (mis->postcopy_remote_fds) { > > + g_array_free(mis->postcopy_remote_fds, TRUE); > > + mis->postcopy_remote_fds = NULL; > > + } > > > > qemu_event_reset(&mis->main_thread_load_event); > > } > > diff --git a/migration/migration.h b/migration/migration.h > > index 08c5d2ded1..d02a759331 100644 > > --- a/migration/migration.h > > +++ b/migration/migration.h > > @@ -51,6 +51,8 @@ struct MigrationIncomingState { > > QemuMutex rp_mutex; /* We send replies from multiple threads */ > > void *postcopy_tmp_page; > > void *postcopy_tmp_zero_page; > > + /* PostCopyFD's for external userfaultfds & handlers of shared memory */ > > + GArray *postcopy_remote_fds; > > > > QEMUBH *bh; > > > > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c > > index 1089814d54..4ab1b7d36d 100644 > > --- a/migration/postcopy-ram.c > > +++ b/migration/postcopy-ram.c > > @@ -533,29 +533,44 @@ static void *postcopy_ram_fault_thread(void *opaque) > > MigrationIncomingState *mis = opaque; > > struct uffd_msg msg; > > int ret; > > + size_t index; > > RAMBlock *rb = NULL; > > RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */ > > > > trace_postcopy_ram_fault_thread_entry(); > > qemu_sem_post(&mis->fault_thread_sem); > > > > + struct pollfd *pfd; > > + size_t pfd_len = 2 + mis->postcopy_remote_fds->len; > > + > > + pfd = g_new0(struct pollfd, pfd_len); > > + > > + pfd[0].fd = mis->userfault_fd; > > + pfd[0].events = POLLIN; > > + pfd[1].fd = mis->userfault_event_fd; > > + pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ > > + trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd); > > + for (index = 0; index < mis->postcopy_remote_fds->len; index++) { > > + struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds, > > + struct PostCopyFD, index); > > + pfd[2 + index].fd = pcfd->fd; > > + pfd[2 + index].events = POLLIN; > > + trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr, > > + pcfd->fd); > > + } > > + > > while (true) { > > ram_addr_t rb_offset; > > - struct pollfd pfd[2]; > > + int poll_result; > > > > /* > > * We're mainly waiting for the kernel to give us a faulting HVA, > > * however we can be told to quit via userfault_quit_fd which is > > * an eventfd > > */ > > - pfd[0].fd = mis->userfault_fd; > > - pfd[0].events = POLLIN; > > - pfd[0].revents = 0; > > - pfd[1].fd = mis->userfault_event_fd; > > - pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ > > - pfd[1].revents = 0; > > - > > - if (poll(pfd, 2, -1 /* Wait forever */) == -1) { > > + > > + poll_result = poll(pfd, pfd_len, -1 /* Wait forever */); > > + if (poll_result == -1) { > > error_report("%s: userfault poll: %s", __func__, strerror(errno)); > > break; > > } > > @@ -575,57 +590,117 @@ static void *postcopy_ram_fault_thread(void *opaque) > > } > > } > > > > - ret = read(mis->userfault_fd, &msg, sizeof(msg)); > > - if (ret != sizeof(msg)) { > > - if (errno == EAGAIN) { > > - /* > > - * if a wake up happens on the other thread just after > > - * the poll, there is nothing to read. > > - */ > > - continue; > > + if (pfd[0].revents) { > > + poll_result--; > > + ret = read(mis->userfault_fd, &msg, sizeof(msg)); > > + if (ret != sizeof(msg)) { > > + if (errno == EAGAIN) { > > + /* > > + * if a wake up happens on the other thread just after > > + * the poll, there is nothing to read. > > + */ > > + continue; > > + } > > + if (ret < 0) { > > + error_report("%s: Failed to read full userfault " > > + "message: %s", > > + __func__, strerror(errno)); > > + break; > > + } else { > > + error_report("%s: Read %d bytes from userfaultfd " > > + "expected %zd", > > + __func__, ret, sizeof(msg)); > > + break; /* Lost alignment, don't know what we'd read next */ > > + } > > } > > - if (ret < 0) { > > - error_report("%s: Failed to read full userfault message: %s", > > - __func__, strerror(errno)); > > - break; > > - } else { > > - error_report("%s: Read %d bytes from userfaultfd expected %zd", > > - __func__, ret, sizeof(msg)); > > - break; /* Lost alignment, don't know what we'd read next */ > > + if (msg.event != UFFD_EVENT_PAGEFAULT) { > > + error_report("%s: Read unexpected event %ud from userfaultfd", > > + __func__, msg.event); > > + continue; /* It's not a page fault, shouldn't happen */ > > } > > - } > > - if (msg.event != UFFD_EVENT_PAGEFAULT) { > > - error_report("%s: Read unexpected event %ud from userfaultfd", > > - __func__, msg.event); > > - continue; /* It's not a page fault, shouldn't happen */ > > - } > > > > - rb = qemu_ram_block_from_host( > > - (void *)(uintptr_t)msg.arg.pagefault.address, > > - true, &rb_offset); > > - if (!rb) { > > - error_report("postcopy_ram_fault_thread: Fault outside guest: %" > > - PRIx64, (uint64_t)msg.arg.pagefault.address); > > - break; > > - } > > + rb = qemu_ram_block_from_host( > > + (void *)(uintptr_t)msg.arg.pagefault.address, > > + true, &rb_offset); > > + if (!rb) { > > + error_report("postcopy_ram_fault_thread: Fault outside guest: %" > > + PRIx64, (uint64_t)msg.arg.pagefault.address); > > + break; > > + } > > > > - rb_offset &= ~(qemu_ram_pagesize(rb) - 1); > > - trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, > > + rb_offset &= ~(qemu_ram_pagesize(rb) - 1); > > + trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, > > qemu_ram_get_idstr(rb), > > rb_offset); > > + /* > > + * Send the request to the source - we want to request one > > + * of our host page sizes (which is >= TPS) > > + */ > > + if (rb != last_rb) { > > + last_rb = rb; > > + migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), > > + rb_offset, qemu_ram_pagesize(rb)); > > + } else { > > + /* Save some space */ > > + migrate_send_rp_req_pages(mis, NULL, > > + rb_offset, qemu_ram_pagesize(rb)); > > + } > > + } > > > > - /* > > - * Send the request to the source - we want to request one > > - * of our host page sizes (which is >= TPS) > > - */ > > - if (rb != last_rb) { > > - last_rb = rb; > > - migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), > > - rb_offset, qemu_ram_pagesize(rb)); > > - } else { > > - /* Save some space */ > > - migrate_send_rp_req_pages(mis, NULL, > > - rb_offset, qemu_ram_pagesize(rb)); > > + /* Now handle any requests from external processes on shared memory */ > > + /* TODO: May need to handle devices deregistering during postcopy */ > > + for (index = 2; index < pfd_len && poll_result; index++) { > > + if (pfd[index].revents) { > > + struct PostCopyFD *pcfd = > > + &g_array_index(mis->postcopy_remote_fds, > > + struct PostCopyFD, index - 2); > > + > > + poll_result--; > > + if (pfd[index].revents & POLLERR) { > > + error_report("%s: POLLERR on poll %zd fd=%d", > > + __func__, index, pcfd->fd); > > + pfd[index].events = 0; > > + continue; > > + } > > + > > + ret = read(pcfd->fd, &msg, sizeof(msg)); > > + if (ret != sizeof(msg)) { > > + if (errno == EAGAIN) { > > + /* > > + * if a wake up happens on the other thread just after > > + * the poll, there is nothing to read. > > + */ > > + continue; > > + } > > + if (ret < 0) { > > + error_report("%s: Failed to read full userfault " > > + "message: %s (shared) revents=%d", > > + __func__, strerror(errno), > > + pfd[index].revents); > > + /*TODO: Could just disable this sharer */ > > + break; > > + } else { > > + error_report("%s: Read %d bytes from userfaultfd " > > + "expected %zd (shared)", > > + __func__, ret, sizeof(msg)); > > + /*TODO: Could just disable this sharer */ > > + break; /*Lost alignment,don't know what we'd read next*/ > > + } > > + } > > + if (msg.event != UFFD_EVENT_PAGEFAULT) { > > + error_report("%s: Read unexpected event %ud " > > + "from userfaultfd (shared)", > > + __func__, msg.event); > > + continue; /* It's not a page fault, shouldn't happen */ > > + } > > + /* Call the device handler registered with us */ > > + ret = pcfd->handler(pcfd, &msg); > > + if (ret) { > > + error_report("%s: Failed to resolve shared fault on %zd/%s", > > + __func__, index, pcfd->idstr); > > + /* TODO: Fail? Disable this sharer? */ > > + } > > + } > > } > > } > > trace_postcopy_ram_fault_thread_exit(); > > @@ -970,3 +1045,32 @@ PostcopyState postcopy_state_set(PostcopyState new_state) > > { > > return atomic_xchg(&incoming_postcopy_state, new_state); > > } > > + > > +/* Register a handler for external shared memory postcopy > > + * called on the destination. > > + */ > > +void postcopy_register_shared_ufd(struct PostCopyFD *pcfd) > > +{ > > + MigrationIncomingState *mis = migration_incoming_get_current(); > > + > > + mis->postcopy_remote_fds = g_array_append_val(mis->postcopy_remote_fds, > > + *pcfd); > > +} > > + > > +/* Unregister a handler for external shared memory postcopy > > + */ > > +void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) > > +{ > > + guint i; > > + MigrationIncomingState *mis = migration_incoming_get_current(); > > + GArray *pcrfds = mis->postcopy_remote_fds; > > + > > + for (i = 0; i < pcrfds->len; i++) { > > + struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i); > > + if (cur->fd == pcfd->fd) { > > + mis->postcopy_remote_fds = g_array_remove_index(pcrfds, i); > > + return; > > + } > > + } > > +} > > + > > diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h > > index 0421c98d57..f21eef6702 100644 > > --- a/migration/postcopy-ram.h > > +++ b/migration/postcopy-ram.h > > @@ -143,4 +143,25 @@ void postcopy_remove_notifier(NotifierWithReturn *n); > > /* Call the notifier list set by postcopy_add_start_notifier */ > > int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp); > > > > +struct PostCopyFD; > > + > > +/* ufd is a pointer to the struct uffd_msg *TODO: more Portable! */ > > +typedef int (*pcfdhandler)(struct PostCopyFD *pcfd, void *ufd); > > + > > +struct PostCopyFD { > > + int fd; > > + /* Data to pass to handler */ > > + void *data; > > + /* Handler to be called whenever we get a poll event */ > > + pcfdhandler handler; > > + /* A string to use in error messages */ > > + const char *idstr; > > +}; > > + > > +/* Register a userfaultfd owned by an external process for > > + * shared memory. > > + */ > > +void postcopy_register_shared_ufd(struct PostCopyFD *pcfd); > > +void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd); > > + > > #endif > > diff --git a/migration/trace-events b/migration/trace-events > > index 93961dea16..1e617ad7a6 100644 > > --- a/migration/trace-events > > +++ b/migration/trace-events > > @@ -190,6 +190,8 @@ postcopy_place_page_zero(void *host_addr) "host=%p" > > postcopy_ram_enable_notify(void) "" > > postcopy_ram_fault_thread_entry(void) "" > > postcopy_ram_fault_thread_exit(void) "" > > +postcopy_ram_fault_thread_fds_core(int baseufd, int quitfd) "ufd: %d quitfd: %d" > > +postcopy_ram_fault_thread_fds_extra(size_t index, const char *name, int fd) "%zd/%s: %d" > > postcopy_ram_fault_thread_quit(void) "" > > postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx" > > postcopy_ram_incoming_cleanup_closeuf(void) "" > > -- > > 2.14.3 -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/migration/migration.c b/migration/migration.c index 6a4780ef6f..1f22f463d3 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -155,6 +155,8 @@ MigrationIncomingState *migration_incoming_get_current(void) if (!once) { mis_current.state = MIGRATION_STATUS_NONE; memset(&mis_current, 0, sizeof(MigrationIncomingState)); + mis_current.postcopy_remote_fds = g_array_new(FALSE, TRUE, + sizeof(struct PostCopyFD)); qemu_mutex_init(&mis_current.rp_mutex); qemu_event_init(&mis_current.main_thread_load_event, false); once = true; @@ -177,6 +179,10 @@ void migration_incoming_state_destroy(void) qemu_fclose(mis->from_src_file); mis->from_src_file = NULL; } + if (mis->postcopy_remote_fds) { + g_array_free(mis->postcopy_remote_fds, TRUE); + mis->postcopy_remote_fds = NULL; + } qemu_event_reset(&mis->main_thread_load_event); } diff --git a/migration/migration.h b/migration/migration.h index 08c5d2ded1..d02a759331 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -51,6 +51,8 @@ struct MigrationIncomingState { QemuMutex rp_mutex; /* We send replies from multiple threads */ void *postcopy_tmp_page; void *postcopy_tmp_zero_page; + /* PostCopyFD's for external userfaultfds & handlers of shared memory */ + GArray *postcopy_remote_fds; QEMUBH *bh; diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 1089814d54..4ab1b7d36d 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -533,29 +533,44 @@ static void *postcopy_ram_fault_thread(void *opaque) MigrationIncomingState *mis = opaque; struct uffd_msg msg; int ret; + size_t index; RAMBlock *rb = NULL; RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */ trace_postcopy_ram_fault_thread_entry(); qemu_sem_post(&mis->fault_thread_sem); + struct pollfd *pfd; + size_t pfd_len = 2 + mis->postcopy_remote_fds->len; + + pfd = g_new0(struct pollfd, pfd_len); + + pfd[0].fd = mis->userfault_fd; + pfd[0].events = POLLIN; + pfd[1].fd = mis->userfault_event_fd; + pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ + trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd); + for (index = 0; index < mis->postcopy_remote_fds->len; index++) { + struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds, + struct PostCopyFD, index); + pfd[2 + index].fd = pcfd->fd; + pfd[2 + index].events = POLLIN; + trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr, + pcfd->fd); + } + while (true) { ram_addr_t rb_offset; - struct pollfd pfd[2]; + int poll_result; /* * We're mainly waiting for the kernel to give us a faulting HVA, * however we can be told to quit via userfault_quit_fd which is * an eventfd */ - pfd[0].fd = mis->userfault_fd; - pfd[0].events = POLLIN; - pfd[0].revents = 0; - pfd[1].fd = mis->userfault_event_fd; - pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ - pfd[1].revents = 0; - - if (poll(pfd, 2, -1 /* Wait forever */) == -1) { + + poll_result = poll(pfd, pfd_len, -1 /* Wait forever */); + if (poll_result == -1) { error_report("%s: userfault poll: %s", __func__, strerror(errno)); break; } @@ -575,57 +590,117 @@ static void *postcopy_ram_fault_thread(void *opaque) } } - ret = read(mis->userfault_fd, &msg, sizeof(msg)); - if (ret != sizeof(msg)) { - if (errno == EAGAIN) { - /* - * if a wake up happens on the other thread just after - * the poll, there is nothing to read. - */ - continue; + if (pfd[0].revents) { + poll_result--; + ret = read(mis->userfault_fd, &msg, sizeof(msg)); + if (ret != sizeof(msg)) { + if (errno == EAGAIN) { + /* + * if a wake up happens on the other thread just after + * the poll, there is nothing to read. + */ + continue; + } + if (ret < 0) { + error_report("%s: Failed to read full userfault " + "message: %s", + __func__, strerror(errno)); + break; + } else { + error_report("%s: Read %d bytes from userfaultfd " + "expected %zd", + __func__, ret, sizeof(msg)); + break; /* Lost alignment, don't know what we'd read next */ + } } - if (ret < 0) { - error_report("%s: Failed to read full userfault message: %s", - __func__, strerror(errno)); - break; - } else { - error_report("%s: Read %d bytes from userfaultfd expected %zd", - __func__, ret, sizeof(msg)); - break; /* Lost alignment, don't know what we'd read next */ + if (msg.event != UFFD_EVENT_PAGEFAULT) { + error_report("%s: Read unexpected event %ud from userfaultfd", + __func__, msg.event); + continue; /* It's not a page fault, shouldn't happen */ } - } - if (msg.event != UFFD_EVENT_PAGEFAULT) { - error_report("%s: Read unexpected event %ud from userfaultfd", - __func__, msg.event); - continue; /* It's not a page fault, shouldn't happen */ - } - rb = qemu_ram_block_from_host( - (void *)(uintptr_t)msg.arg.pagefault.address, - true, &rb_offset); - if (!rb) { - error_report("postcopy_ram_fault_thread: Fault outside guest: %" - PRIx64, (uint64_t)msg.arg.pagefault.address); - break; - } + rb = qemu_ram_block_from_host( + (void *)(uintptr_t)msg.arg.pagefault.address, + true, &rb_offset); + if (!rb) { + error_report("postcopy_ram_fault_thread: Fault outside guest: %" + PRIx64, (uint64_t)msg.arg.pagefault.address); + break; + } - rb_offset &= ~(qemu_ram_pagesize(rb) - 1); - trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, + rb_offset &= ~(qemu_ram_pagesize(rb) - 1); + trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, qemu_ram_get_idstr(rb), rb_offset); + /* + * Send the request to the source - we want to request one + * of our host page sizes (which is >= TPS) + */ + if (rb != last_rb) { + last_rb = rb; + migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), + rb_offset, qemu_ram_pagesize(rb)); + } else { + /* Save some space */ + migrate_send_rp_req_pages(mis, NULL, + rb_offset, qemu_ram_pagesize(rb)); + } + } - /* - * Send the request to the source - we want to request one - * of our host page sizes (which is >= TPS) - */ - if (rb != last_rb) { - last_rb = rb; - migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), - rb_offset, qemu_ram_pagesize(rb)); - } else { - /* Save some space */ - migrate_send_rp_req_pages(mis, NULL, - rb_offset, qemu_ram_pagesize(rb)); + /* Now handle any requests from external processes on shared memory */ + /* TODO: May need to handle devices deregistering during postcopy */ + for (index = 2; index < pfd_len && poll_result; index++) { + if (pfd[index].revents) { + struct PostCopyFD *pcfd = + &g_array_index(mis->postcopy_remote_fds, + struct PostCopyFD, index - 2); + + poll_result--; + if (pfd[index].revents & POLLERR) { + error_report("%s: POLLERR on poll %zd fd=%d", + __func__, index, pcfd->fd); + pfd[index].events = 0; + continue; + } + + ret = read(pcfd->fd, &msg, sizeof(msg)); + if (ret != sizeof(msg)) { + if (errno == EAGAIN) { + /* + * if a wake up happens on the other thread just after + * the poll, there is nothing to read. + */ + continue; + } + if (ret < 0) { + error_report("%s: Failed to read full userfault " + "message: %s (shared) revents=%d", + __func__, strerror(errno), + pfd[index].revents); + /*TODO: Could just disable this sharer */ + break; + } else { + error_report("%s: Read %d bytes from userfaultfd " + "expected %zd (shared)", + __func__, ret, sizeof(msg)); + /*TODO: Could just disable this sharer */ + break; /*Lost alignment,don't know what we'd read next*/ + } + } + if (msg.event != UFFD_EVENT_PAGEFAULT) { + error_report("%s: Read unexpected event %ud " + "from userfaultfd (shared)", + __func__, msg.event); + continue; /* It's not a page fault, shouldn't happen */ + } + /* Call the device handler registered with us */ + ret = pcfd->handler(pcfd, &msg); + if (ret) { + error_report("%s: Failed to resolve shared fault on %zd/%s", + __func__, index, pcfd->idstr); + /* TODO: Fail? Disable this sharer? */ + } + } } } trace_postcopy_ram_fault_thread_exit(); @@ -970,3 +1045,32 @@ PostcopyState postcopy_state_set(PostcopyState new_state) { return atomic_xchg(&incoming_postcopy_state, new_state); } + +/* Register a handler for external shared memory postcopy + * called on the destination. + */ +void postcopy_register_shared_ufd(struct PostCopyFD *pcfd) +{ + MigrationIncomingState *mis = migration_incoming_get_current(); + + mis->postcopy_remote_fds = g_array_append_val(mis->postcopy_remote_fds, + *pcfd); +} + +/* Unregister a handler for external shared memory postcopy + */ +void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) +{ + guint i; + MigrationIncomingState *mis = migration_incoming_get_current(); + GArray *pcrfds = mis->postcopy_remote_fds; + + for (i = 0; i < pcrfds->len; i++) { + struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i); + if (cur->fd == pcfd->fd) { + mis->postcopy_remote_fds = g_array_remove_index(pcrfds, i); + return; + } + } +} + diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 0421c98d57..f21eef6702 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -143,4 +143,25 @@ void postcopy_remove_notifier(NotifierWithReturn *n); /* Call the notifier list set by postcopy_add_start_notifier */ int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp); +struct PostCopyFD; + +/* ufd is a pointer to the struct uffd_msg *TODO: more Portable! */ +typedef int (*pcfdhandler)(struct PostCopyFD *pcfd, void *ufd); + +struct PostCopyFD { + int fd; + /* Data to pass to handler */ + void *data; + /* Handler to be called whenever we get a poll event */ + pcfdhandler handler; + /* A string to use in error messages */ + const char *idstr; +}; + +/* Register a userfaultfd owned by an external process for + * shared memory. + */ +void postcopy_register_shared_ufd(struct PostCopyFD *pcfd); +void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd); + #endif diff --git a/migration/trace-events b/migration/trace-events index 93961dea16..1e617ad7a6 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -190,6 +190,8 @@ postcopy_place_page_zero(void *host_addr) "host=%p" postcopy_ram_enable_notify(void) "" postcopy_ram_fault_thread_entry(void) "" postcopy_ram_fault_thread_exit(void) "" +postcopy_ram_fault_thread_fds_core(int baseufd, int quitfd) "ufd: %d quitfd: %d" +postcopy_ram_fault_thread_fds_extra(size_t index, const char *name, int fd) "%zd/%s: %d" postcopy_ram_fault_thread_quit(void) "" postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx" postcopy_ram_incoming_cleanup_closeuf(void) ""