Message ID | 20230412213510.1220557-3-amoorthy@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Improve scalability of KVM + userfaultfd live migration via annotated memory faults. | expand |
On 4/13/2023 5:34 AM, Anish Moorthy wrote: > With multiple reader threads POLLing a single UFFD, the test suffers > from the thundering herd problem: performance degrades as the number of > reader threads is increased. Solve this issue [1] by switching the > the polling mechanism to EPOLL + EPOLLEXCLUSIVE. > > Also, change the error-handling convention of uffd_handler_thread_fn. > Instead of just printing errors and returning early from the polling > loop, check for them via TEST_ASSERT. "return NULL" is reserved for a > successful exit from uffd_handler_thread_fn, ie one triggered by a > write to the exit pipe. > > Performance samples generated by the command in [2] are given below. > > Num Reader Threads, Paging Rate (POLL), Paging Rate (EPOLL) > 1 249k 185k > 2 201k 235k > 4 186k 155k > 16 150k 217k > 32 89k 198k > > [1] Single-vCPU performance does suffer somewhat. > [2] ./demand_paging_test -u MINOR -s shmem -v 4 -o -r <num readers> > > Signed-off-by: Anish Moorthy <amoorthy@google.com> > Acked-by: James Houghton <jthoughton@google.com> > --- > .../selftests/kvm/demand_paging_test.c | 1 - > .../selftests/kvm/lib/userfaultfd_util.c | 74 +++++++++---------- > 2 files changed, 35 insertions(+), 40 deletions(-) > > diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c > index 6c2253f4a64ef..c729cee4c2055 100644 > --- a/tools/testing/selftests/kvm/demand_paging_test.c > +++ b/tools/testing/selftests/kvm/demand_paging_test.c > @@ -13,7 +13,6 @@ > #include <stdio.h> > #include <stdlib.h> > #include <time.h> > -#include <poll.h> > #include <pthread.h> > #include <linux/userfaultfd.h> > #include <sys/syscall.h> > diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c > index 2723ee1e3e1b2..909ad69c1cb04 100644 > --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c > +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c > @@ -16,6 +16,7 @@ > #include <poll.h> > #include <pthread.h> > #include <linux/userfaultfd.h> > +#include <sys/epoll.h> > #include <sys/syscall.h> > > #include "kvm_util.h" > @@ -32,60 +33,55 @@ static void *uffd_handler_thread_fn(void *arg) > int64_t pages = 0; > struct timespec start; > struct timespec ts_diff; > + int epollfd; > + struct epoll_event evt; > + > + epollfd = epoll_create(1); > + TEST_ASSERT(epollfd >= 0, "Failed to create epollfd."); > + > + evt.events = EPOLLIN | EPOLLEXCLUSIVE; > + evt.data.u32 = 0; > + TEST_ASSERT(epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt) == 0, > + "Failed to add uffd to epollfd"); > + > + evt.events = EPOLLIN; > + evt.data.u32 = 1; > + TEST_ASSERT(epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt) == 0, > + "Failed to add pipe to epollfd"); > > clock_gettime(CLOCK_MONOTONIC, &start); > while (1) { > struct uffd_msg msg; > - struct pollfd pollfd[2]; > - char tmp_chr; > int r; > > - pollfd[0].fd = uffd; > - pollfd[0].events = POLLIN; > - pollfd[1].fd = reader_args->pipe; > - pollfd[1].events = POLLIN; > - > - r = poll(pollfd, 2, -1); > - switch (r) { > - case -1: > - pr_info("poll err"); > - continue; > - case 0: > - continue; > - case 1: > - break; > - default: > - pr_info("Polling uffd returned %d", r); > - return NULL; > - } > + r = epoll_wait(epollfd, &evt, 1, -1); > + TEST_ASSERT(r == 1, > + "Unexpected number of events (%d) from epoll, errno = %d", > + r, errno); > too much indentation, also seen elsewhere. > - if (pollfd[0].revents & POLLERR) { > - pr_info("uffd revents has POLLERR"); > - return NULL; > - } > + if (evt.data.u32 == 1) { > + char tmp_chr; > > - if (pollfd[1].revents & POLLIN) { > - r = read(pollfd[1].fd, &tmp_chr, 1); > + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), > + "Reader thread received EPOLLERR or EPOLLHUP on pipe."); > + r = read(reader_args->pipe, &tmp_chr, 1); > TEST_ASSERT(r == 1, > - "Error reading pipefd in UFFD thread\n"); > + "Error reading pipefd in uffd reader thread"); > return NULL; How about goto ts_diff = timespec_elapsed(start); Otherwise last stats won't get chances to be calc'ed. > } > > - if (!(pollfd[0].revents & POLLIN)) > - continue; > + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), > + "Reader thread received EPOLLERR or EPOLLHUP on uffd."); > > r = read(uffd, &msg, sizeof(msg)); > if (r == -1) { > - if (errno == EAGAIN) > - continue; > - pr_info("Read of uffd got errno %d\n", errno); > - return NULL; > + TEST_ASSERT(errno == EAGAIN, > + "Error reading from UFFD: errno = %d", errno); > + continue; > } > > - if (r != sizeof(msg)) { > - pr_info("Read on uffd returned unexpected size: %d bytes", r); > - return NULL; > - } > + TEST_ASSERT(r == sizeof(msg), > + "Read on uffd returned unexpected number of bytes (%d)", r); > > if (!(msg.event & UFFD_EVENT_PAGEFAULT)) > continue; > @@ -93,8 +89,8 @@ static void *uffd_handler_thread_fn(void *arg) > if (reader_args->delay) > usleep(reader_args->delay); > r = reader_args->handler(reader_args->uffd_mode, uffd, &msg); > - if (r < 0) > - return NULL; > + TEST_ASSERT(r >= 0, > + "Reader thread handler fn returned negative value %d", r); > pages++; > } >
On Wed, Apr 19, 2023 at 6:36 AM Hoo Robert <robert.hoo.linux@gmail.com> wrote: > > How about goto > ts_diff = timespec_elapsed(start); > Otherwise last stats won't get chances to be calc'ed. Good idea, done. > > + TEST_ASSERT(r == 1, > > + "Unexpected number of events (%d) from epoll, errno = %d", > > + r, errno); > > > too much indentation, also seen elsewhere. Augh, my editor has been set to a tab width of 4 this entire time. That... explains a lot >:( > > } > > > > - if (!(pollfd[0].revents & POLLIN)) > > - continue; > > + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), > > + "Reader thread received EPOLLERR or EPOLLHUP on uffd."); > > > > r = read(uffd, &msg, sizeof(msg)); > > if (r == -1) { > > - if (errno == EAGAIN) > > - continue; > > - pr_info("Read of uffd got errno %d\n", errno); > > - return NULL; > > + TEST_ASSERT(errno == EAGAIN, > > + "Error reading from UFFD: errno = %d", errno); > > + continue; > > } > > > > - if (r != sizeof(msg)) { > > - pr_info("Read on uffd returned unexpected size: %d bytes", r); > > - return NULL; > > - } > > + TEST_ASSERT(r == sizeof(msg), > > + "Read on uffd returned unexpected number of bytes (%d)", r); > > > > if (!(msg.event & UFFD_EVENT_PAGEFAULT)) > > continue; > > @@ -93,8 +89,8 @@ static void *uffd_handler_thread_fn(void *arg) > > if (reader_args->delay) > > usleep(reader_args->delay); > > r = reader_args->handler(reader_args->uffd_mode, uffd, &msg); > > - if (r < 0) > > - return NULL; > > + TEST_ASSERT(r >= 0, > > + "Reader thread handler fn returned negative value %d", r); > > pages++; > > } > > >
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 6c2253f4a64ef..c729cee4c2055 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -13,7 +13,6 @@ #include <stdio.h> #include <stdlib.h> #include <time.h> -#include <poll.h> #include <pthread.h> #include <linux/userfaultfd.h> #include <sys/syscall.h> diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 2723ee1e3e1b2..909ad69c1cb04 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -16,6 +16,7 @@ #include <poll.h> #include <pthread.h> #include <linux/userfaultfd.h> +#include <sys/epoll.h> #include <sys/syscall.h> #include "kvm_util.h" @@ -32,60 +33,55 @@ static void *uffd_handler_thread_fn(void *arg) int64_t pages = 0; struct timespec start; struct timespec ts_diff; + int epollfd; + struct epoll_event evt; + + epollfd = epoll_create(1); + TEST_ASSERT(epollfd >= 0, "Failed to create epollfd."); + + evt.events = EPOLLIN | EPOLLEXCLUSIVE; + evt.data.u32 = 0; + TEST_ASSERT(epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt) == 0, + "Failed to add uffd to epollfd"); + + evt.events = EPOLLIN; + evt.data.u32 = 1; + TEST_ASSERT(epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt) == 0, + "Failed to add pipe to epollfd"); clock_gettime(CLOCK_MONOTONIC, &start); while (1) { struct uffd_msg msg; - struct pollfd pollfd[2]; - char tmp_chr; int r; - pollfd[0].fd = uffd; - pollfd[0].events = POLLIN; - pollfd[1].fd = reader_args->pipe; - pollfd[1].events = POLLIN; - - r = poll(pollfd, 2, -1); - switch (r) { - case -1: - pr_info("poll err"); - continue; - case 0: - continue; - case 1: - break; - default: - pr_info("Polling uffd returned %d", r); - return NULL; - } + r = epoll_wait(epollfd, &evt, 1, -1); + TEST_ASSERT(r == 1, + "Unexpected number of events (%d) from epoll, errno = %d", + r, errno); - if (pollfd[0].revents & POLLERR) { - pr_info("uffd revents has POLLERR"); - return NULL; - } + if (evt.data.u32 == 1) { + char tmp_chr; - if (pollfd[1].revents & POLLIN) { - r = read(pollfd[1].fd, &tmp_chr, 1); + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), + "Reader thread received EPOLLERR or EPOLLHUP on pipe."); + r = read(reader_args->pipe, &tmp_chr, 1); TEST_ASSERT(r == 1, - "Error reading pipefd in UFFD thread\n"); + "Error reading pipefd in uffd reader thread"); return NULL; } - if (!(pollfd[0].revents & POLLIN)) - continue; + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), + "Reader thread received EPOLLERR or EPOLLHUP on uffd."); r = read(uffd, &msg, sizeof(msg)); if (r == -1) { - if (errno == EAGAIN) - continue; - pr_info("Read of uffd got errno %d\n", errno); - return NULL; + TEST_ASSERT(errno == EAGAIN, + "Error reading from UFFD: errno = %d", errno); + continue; } - if (r != sizeof(msg)) { - pr_info("Read on uffd returned unexpected size: %d bytes", r); - return NULL; - } + TEST_ASSERT(r == sizeof(msg), + "Read on uffd returned unexpected number of bytes (%d)", r); if (!(msg.event & UFFD_EVENT_PAGEFAULT)) continue; @@ -93,8 +89,8 @@ static void *uffd_handler_thread_fn(void *arg) if (reader_args->delay) usleep(reader_args->delay); r = reader_args->handler(reader_args->uffd_mode, uffd, &msg); - if (r < 0) - return NULL; + TEST_ASSERT(r >= 0, + "Reader thread handler fn returned negative value %d", r); pages++; }