diff mbox series

[v3,02/22] KVM: selftests: Use EPOLL in userfaultfd_util reader threads and signal errors via TEST_ASSERT

Message ID 20230412213510.1220557-3-amoorthy@google.com (mailing list archive)
State New, archived
Headers show
Series Improve scalability of KVM + userfaultfd live migration via annotated memory faults. | expand

Commit Message

Anish Moorthy April 12, 2023, 9:34 p.m. UTC
With multiple reader threads POLLing a single UFFD, the test suffers
from the thundering herd problem: performance degrades as the number of
reader threads is increased. Solve this issue [1] by switching the
the polling mechanism to EPOLL + EPOLLEXCLUSIVE.

Also, change the error-handling convention of uffd_handler_thread_fn.
Instead of just printing errors and returning early from the polling
loop, check for them via TEST_ASSERT. "return NULL" is reserved for a
successful exit from uffd_handler_thread_fn, ie one triggered by a
write to the exit pipe.

Performance samples generated by the command in [2] are given below.

Num Reader Threads, Paging Rate (POLL), Paging Rate (EPOLL)
1      249k      185k
2      201k      235k
4      186k      155k
16     150k      217k
32     89k       198k

[1] Single-vCPU performance does suffer somewhat.
[2] ./demand_paging_test -u MINOR -s shmem -v 4 -o -r <num readers>

Signed-off-by: Anish Moorthy <amoorthy@google.com>
Acked-by: James Houghton <jthoughton@google.com>
---
 .../selftests/kvm/demand_paging_test.c        |  1 -
 .../selftests/kvm/lib/userfaultfd_util.c      | 74 +++++++++----------
 2 files changed, 35 insertions(+), 40 deletions(-)

Comments

Robert Hoo April 19, 2023, 1:36 p.m. UTC | #1
On 4/13/2023 5:34 AM, Anish Moorthy wrote:
> With multiple reader threads POLLing a single UFFD, the test suffers
> from the thundering herd problem: performance degrades as the number of
> reader threads is increased. Solve this issue [1] by switching the
> the polling mechanism to EPOLL + EPOLLEXCLUSIVE.
> 
> Also, change the error-handling convention of uffd_handler_thread_fn.
> Instead of just printing errors and returning early from the polling
> loop, check for them via TEST_ASSERT. "return NULL" is reserved for a
> successful exit from uffd_handler_thread_fn, ie one triggered by a
> write to the exit pipe.
> 
> Performance samples generated by the command in [2] are given below.
> 
> Num Reader Threads, Paging Rate (POLL), Paging Rate (EPOLL)
> 1      249k      185k
> 2      201k      235k
> 4      186k      155k
> 16     150k      217k
> 32     89k       198k
> 
> [1] Single-vCPU performance does suffer somewhat.
> [2] ./demand_paging_test -u MINOR -s shmem -v 4 -o -r <num readers>
> 
> Signed-off-by: Anish Moorthy <amoorthy@google.com>
> Acked-by: James Houghton <jthoughton@google.com>
> ---
>   .../selftests/kvm/demand_paging_test.c        |  1 -
>   .../selftests/kvm/lib/userfaultfd_util.c      | 74 +++++++++----------
>   2 files changed, 35 insertions(+), 40 deletions(-)
> 
> diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
> index 6c2253f4a64ef..c729cee4c2055 100644
> --- a/tools/testing/selftests/kvm/demand_paging_test.c
> +++ b/tools/testing/selftests/kvm/demand_paging_test.c
> @@ -13,7 +13,6 @@
>   #include <stdio.h>
>   #include <stdlib.h>
>   #include <time.h>
> -#include <poll.h>
>   #include <pthread.h>
>   #include <linux/userfaultfd.h>
>   #include <sys/syscall.h>
> diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
> index 2723ee1e3e1b2..909ad69c1cb04 100644
> --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
> +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
> @@ -16,6 +16,7 @@
>   #include <poll.h>
>   #include <pthread.h>
>   #include <linux/userfaultfd.h>
> +#include <sys/epoll.h>
>   #include <sys/syscall.h>
>   
>   #include "kvm_util.h"
> @@ -32,60 +33,55 @@ static void *uffd_handler_thread_fn(void *arg)
>   	int64_t pages = 0;
>   	struct timespec start;
>   	struct timespec ts_diff;
> +	int epollfd;
> +	struct epoll_event evt;
> +
> +	epollfd = epoll_create(1);
> +	TEST_ASSERT(epollfd >= 0, "Failed to create epollfd.");
> +
> +	evt.events = EPOLLIN | EPOLLEXCLUSIVE;
> +	evt.data.u32 = 0;
> +	TEST_ASSERT(epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt) == 0,
> +				"Failed to add uffd to epollfd");
> +
> +	evt.events = EPOLLIN;
> +	evt.data.u32 = 1;
> +	TEST_ASSERT(epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt) == 0,
> +				"Failed to add pipe to epollfd");
>   
>   	clock_gettime(CLOCK_MONOTONIC, &start);
>   	while (1) {
>   		struct uffd_msg msg;
> -		struct pollfd pollfd[2];
> -		char tmp_chr;
>   		int r;
>   
> -		pollfd[0].fd = uffd;
> -		pollfd[0].events = POLLIN;
> -		pollfd[1].fd = reader_args->pipe;
> -		pollfd[1].events = POLLIN;
> -
> -		r = poll(pollfd, 2, -1);
> -		switch (r) {
> -		case -1:
> -			pr_info("poll err");
> -			continue;
> -		case 0:
> -			continue;
> -		case 1:
> -			break;
> -		default:
> -			pr_info("Polling uffd returned %d", r);
> -			return NULL;
> -		}
> +		r = epoll_wait(epollfd, &evt, 1, -1);
> +		TEST_ASSERT(r == 1,
> +					"Unexpected number of events (%d) from epoll, errno = %d",
> +					r, errno);
>   
too much indentation, also seen elsewhere.

> -		if (pollfd[0].revents & POLLERR) {
> -			pr_info("uffd revents has POLLERR");
> -			return NULL;
> -		}
> +		if (evt.data.u32 == 1) {
> +			char tmp_chr;
>   
> -		if (pollfd[1].revents & POLLIN) {
> -			r = read(pollfd[1].fd, &tmp_chr, 1);
> +			TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
> +						"Reader thread received EPOLLERR or EPOLLHUP on pipe.");
> +			r = read(reader_args->pipe, &tmp_chr, 1);
>   			TEST_ASSERT(r == 1,
> -				    "Error reading pipefd in UFFD thread\n");
> +						"Error reading pipefd in uffd reader thread");
>   			return NULL;

How about goto
	ts_diff = timespec_elapsed(start);
Otherwise last stats won't get chances to be calc'ed.

>   		}
>   
> -		if (!(pollfd[0].revents & POLLIN))
> -			continue;
> +		TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
> +					"Reader thread received EPOLLERR or EPOLLHUP on uffd.");
>   
>   		r = read(uffd, &msg, sizeof(msg));
>   		if (r == -1) {
> -			if (errno == EAGAIN)
> -				continue;
> -			pr_info("Read of uffd got errno %d\n", errno);
> -			return NULL;
> +			TEST_ASSERT(errno == EAGAIN,
> +						"Error reading from UFFD: errno = %d", errno);
> +			continue;
>   		}
>   
> -		if (r != sizeof(msg)) {
> -			pr_info("Read on uffd returned unexpected size: %d bytes", r);
> -			return NULL;
> -		}
> +		TEST_ASSERT(r == sizeof(msg),
> +					"Read on uffd returned unexpected number of bytes (%d)", r);
>   
>   		if (!(msg.event & UFFD_EVENT_PAGEFAULT))
>   			continue;
> @@ -93,8 +89,8 @@ static void *uffd_handler_thread_fn(void *arg)
>   		if (reader_args->delay)
>   			usleep(reader_args->delay);
>   		r = reader_args->handler(reader_args->uffd_mode, uffd, &msg);
> -		if (r < 0)
> -			return NULL;
> +		TEST_ASSERT(r >= 0,
> +					"Reader thread handler fn returned negative value %d", r);
>   		pages++;
>   	}
>
Anish Moorthy April 19, 2023, 11:26 p.m. UTC | #2
On Wed, Apr 19, 2023 at 6:36 AM Hoo Robert <robert.hoo.linux@gmail.com> wrote:
>
> How about goto
>         ts_diff = timespec_elapsed(start);
> Otherwise last stats won't get chances to be calc'ed.

Good idea, done.

> > +             TEST_ASSERT(r == 1,
> > +                                     "Unexpected number of events (%d) from epoll, errno = %d",
> > +                                     r, errno);
> >
> too much indentation, also seen elsewhere.

Augh, my editor has been set to a tab width of 4 this entire time.
That... explains a lot >:(

> >               }
> >
> > -             if (!(pollfd[0].revents & POLLIN))
> > -                     continue;
> > +             TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
> > +                                     "Reader thread received EPOLLERR or EPOLLHUP on uffd.");
> >
> >               r = read(uffd, &msg, sizeof(msg));
> >               if (r == -1) {
> > -                     if (errno == EAGAIN)
> > -                             continue;
> > -                     pr_info("Read of uffd got errno %d\n", errno);
> > -                     return NULL;
> > +                     TEST_ASSERT(errno == EAGAIN,
> > +                                             "Error reading from UFFD: errno = %d", errno);
> > +                     continue;
> >               }
> >
> > -             if (r != sizeof(msg)) {
> > -                     pr_info("Read on uffd returned unexpected size: %d bytes", r);
> > -                     return NULL;
> > -             }
> > +             TEST_ASSERT(r == sizeof(msg),
> > +                                     "Read on uffd returned unexpected number of bytes (%d)", r);
> >
> >               if (!(msg.event & UFFD_EVENT_PAGEFAULT))
> >                       continue;
> > @@ -93,8 +89,8 @@ static void *uffd_handler_thread_fn(void *arg)
> >               if (reader_args->delay)
> >                       usleep(reader_args->delay);
> >               r = reader_args->handler(reader_args->uffd_mode, uffd, &msg);
> > -             if (r < 0)
> > -                     return NULL;
> > +             TEST_ASSERT(r >= 0,
> > +                                     "Reader thread handler fn returned negative value %d", r);
> >               pages++;
> >       }
> >
>
diff mbox series

Patch

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 6c2253f4a64ef..c729cee4c2055 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -13,7 +13,6 @@ 
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
-#include <poll.h>
 #include <pthread.h>
 #include <linux/userfaultfd.h>
 #include <sys/syscall.h>
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
index 2723ee1e3e1b2..909ad69c1cb04 100644
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -16,6 +16,7 @@ 
 #include <poll.h>
 #include <pthread.h>
 #include <linux/userfaultfd.h>
+#include <sys/epoll.h>
 #include <sys/syscall.h>
 
 #include "kvm_util.h"
@@ -32,60 +33,55 @@  static void *uffd_handler_thread_fn(void *arg)
 	int64_t pages = 0;
 	struct timespec start;
 	struct timespec ts_diff;
+	int epollfd;
+	struct epoll_event evt;
+
+	epollfd = epoll_create(1);
+	TEST_ASSERT(epollfd >= 0, "Failed to create epollfd.");
+
+	evt.events = EPOLLIN | EPOLLEXCLUSIVE;
+	evt.data.u32 = 0;
+	TEST_ASSERT(epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt) == 0,
+				"Failed to add uffd to epollfd");
+
+	evt.events = EPOLLIN;
+	evt.data.u32 = 1;
+	TEST_ASSERT(epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt) == 0,
+				"Failed to add pipe to epollfd");
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
 	while (1) {
 		struct uffd_msg msg;
-		struct pollfd pollfd[2];
-		char tmp_chr;
 		int r;
 
-		pollfd[0].fd = uffd;
-		pollfd[0].events = POLLIN;
-		pollfd[1].fd = reader_args->pipe;
-		pollfd[1].events = POLLIN;
-
-		r = poll(pollfd, 2, -1);
-		switch (r) {
-		case -1:
-			pr_info("poll err");
-			continue;
-		case 0:
-			continue;
-		case 1:
-			break;
-		default:
-			pr_info("Polling uffd returned %d", r);
-			return NULL;
-		}
+		r = epoll_wait(epollfd, &evt, 1, -1);
+		TEST_ASSERT(r == 1,
+					"Unexpected number of events (%d) from epoll, errno = %d",
+					r, errno);
 
-		if (pollfd[0].revents & POLLERR) {
-			pr_info("uffd revents has POLLERR");
-			return NULL;
-		}
+		if (evt.data.u32 == 1) {
+			char tmp_chr;
 
-		if (pollfd[1].revents & POLLIN) {
-			r = read(pollfd[1].fd, &tmp_chr, 1);
+			TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
+						"Reader thread received EPOLLERR or EPOLLHUP on pipe.");
+			r = read(reader_args->pipe, &tmp_chr, 1);
 			TEST_ASSERT(r == 1,
-				    "Error reading pipefd in UFFD thread\n");
+						"Error reading pipefd in uffd reader thread");
 			return NULL;
 		}
 
-		if (!(pollfd[0].revents & POLLIN))
-			continue;
+		TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
+					"Reader thread received EPOLLERR or EPOLLHUP on uffd.");
 
 		r = read(uffd, &msg, sizeof(msg));
 		if (r == -1) {
-			if (errno == EAGAIN)
-				continue;
-			pr_info("Read of uffd got errno %d\n", errno);
-			return NULL;
+			TEST_ASSERT(errno == EAGAIN,
+						"Error reading from UFFD: errno = %d", errno);
+			continue;
 		}
 
-		if (r != sizeof(msg)) {
-			pr_info("Read on uffd returned unexpected size: %d bytes", r);
-			return NULL;
-		}
+		TEST_ASSERT(r == sizeof(msg),
+					"Read on uffd returned unexpected number of bytes (%d)", r);
 
 		if (!(msg.event & UFFD_EVENT_PAGEFAULT))
 			continue;
@@ -93,8 +89,8 @@  static void *uffd_handler_thread_fn(void *arg)
 		if (reader_args->delay)
 			usleep(reader_args->delay);
 		r = reader_args->handler(reader_args->uffd_mode, uffd, &msg);
-		if (r < 0)
-			return NULL;
+		TEST_ASSERT(r >= 0,
+					"Reader thread handler fn returned negative value %d", r);
 		pages++;
 	}