diff mbox series

[RFC,05/13] fs/userfaultfd: introduce UFFD_FEATURE_POLL

Message ID 20201129004548.1619714-6-namit@vmware.com (mailing list archive)
State New, archived
Headers show
Series fs/userfaultfd: support iouring and polling | expand

Commit Message

Nadav Amit Nov. 29, 2020, 12:45 a.m. UTC
From: Nadav Amit <namit@vmware.com>

Add a feature UFFD_FEATURE_POLL that makes the faulting thread spin
while waiting for the page-fault to be handled.

Users of this feature should be wise by setting the page-fault handling
thread on another physical CPU and to potentially ensure that there are
available cores to run the handler, as otherwise they will see
performance degradation.

We can later enhance it by setting one or two timeouts: one timeout
until the page-fault is handled and another until the handler was
woken.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: io-uring@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Signed-off-by: Nadav Amit <namit@vmware.com>
---
 fs/userfaultfd.c                 | 24 ++++++++++++++++++++----
 include/uapi/linux/userfaultfd.h |  9 ++++++++-
 2 files changed, 28 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index fedf7c1615d5..b6a04e526025 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -122,7 +122,9 @@  static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
 	if (len && (start > uwq->msg.arg.pagefault.address ||
 		    start + len <= uwq->msg.arg.pagefault.address))
 		goto out;
-	WRITE_ONCE(uwq->waken, true);
+
+	smp_store_mb(uwq->waken, true);
+
 	/*
 	 * The Program-Order guarantees provided by the scheduler
 	 * ensure uwq->waken is visible before the task is woken.
@@ -377,6 +379,7 @@  vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	vm_fault_t ret = VM_FAULT_SIGBUS;
 	bool must_wait;
 	long blocking_state;
+	bool poll;
 
 	/*
 	 * We don't do userfault handling for the final child pid update.
@@ -410,6 +413,8 @@  vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	if (ctx->features & UFFD_FEATURE_SIGBUS)
 		goto out;
 
+	poll = ctx->features & UFFD_FEATURE_POLL;
+
 	/*
 	 * If it's already released don't get it. This avoids to loop
 	 * in __get_user_pages if userfaultfd_release waits on the
@@ -495,7 +500,10 @@  vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	 * following the spin_unlock to happen before the list_add in
 	 * __add_wait_queue.
 	 */
-	set_current_state(blocking_state);
+
+	if (!poll)
+		set_current_state(blocking_state);
+
 	spin_unlock_irq(&ctx->fault_pending_wqh.lock);
 
 	if (!is_vm_hugetlb_page(vmf->vma))
@@ -509,10 +517,18 @@  vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 
 	if (likely(must_wait && !READ_ONCE(ctx->released))) {
 		wake_up_poll(&ctx->fd_wqh, EPOLLIN);
-		schedule();
+		if (poll) {
+			while (!READ_ONCE(uwq.waken) && !READ_ONCE(ctx->released) &&
+			       !signal_pending(current)) {
+				cpu_relax();
+				cond_resched();
+			}
+		} else
+			schedule();
 	}
 
-	__set_current_state(TASK_RUNNING);
+	if (!poll)
+		__set_current_state(TASK_RUNNING);
 
 	/*
 	 * Here we race with the list_del; list_add in
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index e7e98bde221f..4eeba4235afe 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -27,7 +27,9 @@ 
 			   UFFD_FEATURE_MISSING_HUGETLBFS |	\
 			   UFFD_FEATURE_MISSING_SHMEM |		\
 			   UFFD_FEATURE_SIGBUS |		\
-			   UFFD_FEATURE_THREAD_ID)
+			   UFFD_FEATURE_THREAD_ID |		\
+			   UFFD_FEATURE_POLL)
+
 #define UFFD_API_IOCTLS				\
 	((__u64)1 << _UFFDIO_REGISTER |		\
 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
@@ -171,6 +173,10 @@  struct uffdio_api {
 	 *
 	 * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will
 	 * be returned, if feature is not requested 0 will be returned.
+	 *
+	 * UFFD_FEATURE_POLL polls upon page-fault if the feature is requested
+	 * instead of descheduling. This feature should only be enabled for
+	 * low-latency handlers and when CPUs are not overcomitted.
 	 */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
@@ -181,6 +187,7 @@  struct uffdio_api {
 #define UFFD_FEATURE_EVENT_UNMAP		(1<<6)
 #define UFFD_FEATURE_SIGBUS			(1<<7)
 #define UFFD_FEATURE_THREAD_ID			(1<<8)
+#define UFFD_FEATURE_POLL			(1<<9)
 	__u64 features;
 
 	__u64 ioctls;