@@ -892,6 +892,54 @@ out:
return ret;
}
+static int userfaultfd_remap(struct userfaultfd_ctx *ctx,
+ unsigned long arg)
+{
+ __s64 ret;
+ struct uffdio_remap uffdio_remap;
+ struct uffdio_remap __user *user_uffdio_remap;
+ struct userfaultfd_wake_range range;
+
+ user_uffdio_remap = (struct uffdio_remap __user *) arg;
+
+ ret = -EFAULT;
+ if (copy_from_user(&uffdio_remap, user_uffdio_remap,
+ /* don't copy "remap" and "wake" last field */
+ sizeof(uffdio_remap)-sizeof(__s64)*2))
+ goto out;
+
+ ret = validate_range(ctx->mm, uffdio_remap.dst, uffdio_remap.len);
+ if (ret)
+ goto out;
+ ret = validate_range(current->mm, uffdio_remap.src, uffdio_remap.len);
+ if (ret)
+ goto out;
+ ret = -EINVAL;
+ if (uffdio_remap.mode & ~(UFFDIO_REMAP_MODE_ALLOW_SRC_HOLES|
+ UFFDIO_REMAP_MODE_DONTWAKE))
+ goto out;
+
+ ret = remap_pages(ctx->mm, current->mm,
+ uffdio_remap.dst, uffdio_remap.src,
+ uffdio_remap.len, uffdio_remap.mode);
+ if (unlikely(put_user(ret, &user_uffdio_remap->remap)))
+ return -EFAULT;
+ if (ret < 0)
+ goto out;
+ /* len == 0 would wake all */
+ BUG_ON(!ret);
+ range.len = ret;
+ if (!(uffdio_remap.mode & UFFDIO_REMAP_MODE_DONTWAKE)) {
+ range.start = uffdio_remap.dst;
+ ret = wake_userfault(ctx, &range);
+ if (unlikely(put_user(ret, &user_uffdio_remap->wake)))
+ return -EFAULT;
+ }
+ ret = range.len == uffdio_remap.len ? 0 : -EAGAIN;
+out:
+ return ret;
+}
+
/*
* userland asks for a certain API version and we return which bits
* and ioctl commands are implemented in this kernel for such API
@@ -955,6 +1003,9 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
case UFFDIO_ZEROPAGE:
ret = userfaultfd_zeropage(ctx, arg);
break;
+ case UFFDIO_REMAP:
+ ret = userfaultfd_remap(ctx, arg);
+ break;
}
return ret;
}
This remap ioctl allows to atomically move a page in or out of an userfaultfd address space. It's more expensive than "copy" (and of course more expensive than "zerofill") as it requires a TLB flush on the source range for each ioctl, which is an expensive operation on SMP. Especially if copying only a few pages at time, copying without TLB flush is faster. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> --- fs/userfaultfd.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html