diff mbox

[20/21] userfaultfd: UFFDIO_REMAP

Message ID 1425575884-2574-21-git-send-email-aarcange@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andrea Arcangeli March 5, 2015, 5:18 p.m. UTC
This remap ioctl allows to atomically move a page in or out of an
userfaultfd address space. It's more expensive than "copy" (and of
course more expensive than "zerofill") as it requires a TLB flush on
the source range for each ioctl, which is an expensive operation on
SMP. Especially if copying only a few pages at time, copying without
TLB flush is faster.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---
 fs/userfaultfd.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 6230f22..b4c7f25 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -892,6 +892,54 @@  out:
 	return ret;
 }
 
+static int userfaultfd_remap(struct userfaultfd_ctx *ctx,
+			     unsigned long arg)
+{
+	__s64 ret;
+	struct uffdio_remap uffdio_remap;
+	struct uffdio_remap __user *user_uffdio_remap;
+	struct userfaultfd_wake_range range;
+
+	user_uffdio_remap = (struct uffdio_remap __user *) arg;
+
+	ret = -EFAULT;
+	if (copy_from_user(&uffdio_remap, user_uffdio_remap,
+			   /* don't copy "remap" and "wake" last field */
+			   sizeof(uffdio_remap)-sizeof(__s64)*2))
+		goto out;
+
+	ret = validate_range(ctx->mm, uffdio_remap.dst, uffdio_remap.len);
+	if (ret)
+		goto out;
+	ret = validate_range(current->mm, uffdio_remap.src, uffdio_remap.len);
+	if (ret)
+		goto out;
+	ret = -EINVAL;
+	if (uffdio_remap.mode & ~(UFFDIO_REMAP_MODE_ALLOW_SRC_HOLES|
+				  UFFDIO_REMAP_MODE_DONTWAKE))
+		goto out;
+
+	ret = remap_pages(ctx->mm, current->mm,
+			  uffdio_remap.dst, uffdio_remap.src,
+			  uffdio_remap.len, uffdio_remap.mode);
+	if (unlikely(put_user(ret, &user_uffdio_remap->remap)))
+		return -EFAULT;
+	if (ret < 0)
+		goto out;
+	/* len == 0 would wake all */
+	BUG_ON(!ret);
+	range.len = ret;
+	if (!(uffdio_remap.mode & UFFDIO_REMAP_MODE_DONTWAKE)) {
+		range.start = uffdio_remap.dst;
+		ret = wake_userfault(ctx, &range);
+		if (unlikely(put_user(ret, &user_uffdio_remap->wake)))
+			return -EFAULT;
+	}
+	ret = range.len == uffdio_remap.len ? 0 : -EAGAIN;
+out:
+	return ret;
+}
+
 /*
  * userland asks for a certain API version and we return which bits
  * and ioctl commands are implemented in this kernel for such API
@@ -955,6 +1003,9 @@  static long userfaultfd_ioctl(struct file *file, unsigned cmd,
 	case UFFDIO_ZEROPAGE:
 		ret = userfaultfd_zeropage(ctx, arg);
 		break;
+	case UFFDIO_REMAP:
+		ret = userfaultfd_remap(ctx, arg);
+		break;
 	}
 	return ret;
 }