@@ -3774,6 +3774,7 @@ const struct file_operations btrfs_file_operations = {
.compat_ioctl = btrfs_compat_ioctl,
#endif
.remap_file_range = btrfs_remap_file_range,
+ .uring_cmd = btrfs_uring_cmd,
.fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC,
};
@@ -29,6 +29,7 @@
#include <linux/fileattr.h>
#include <linux/fsverity.h>
#include <linux/sched/xacct.h>
+#include <linux/io_uring/cmd.h>
#include "ctree.h"
#include "disk-io.h"
#include "export.h"
@@ -4723,6 +4724,288 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
return ret;
}
+struct btrfs_uring_priv {
+ struct io_uring_cmd *cmd;
+ struct page **pages;
+ unsigned long nr_pages;
+ struct kiocb iocb;
+ struct iovec *iov;
+ struct iov_iter iter;
+ struct extent_state *cached_state;
+ u64 count;
+ bool compressed;
+ u64 start;
+ u64 lockend;
+};
+
+static void btrfs_uring_read_finished(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ struct btrfs_uring_priv *priv = (struct btrfs_uring_priv *)*(uintptr_t*)cmd->pdu;
+ struct btrfs_inode *inode = BTRFS_I(file_inode(priv->iocb.ki_filp));
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ unsigned long i;
+ u64 cur;
+ size_t page_offset;
+ ssize_t ret;
+
+ if (priv->compressed) {
+ i = 0;
+ page_offset = 0;
+ } else {
+ i = (priv->iocb.ki_pos - priv->start) >> PAGE_SHIFT;
+ page_offset = (priv->iocb.ki_pos - priv->start) & (PAGE_SIZE - 1);
+ }
+ cur = 0;
+ while (cur < priv->count) {
+ size_t bytes = min_t(size_t, priv->count - cur,
+ PAGE_SIZE - page_offset);
+
+ if (copy_page_to_iter(priv->pages[i], page_offset, bytes,
+ &priv->iter) != bytes) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ i++;
+ cur += bytes;
+ page_offset = 0;
+ }
+ ret = priv->count;
+
+out:
+ unlock_extent(io_tree, priv->start, priv->lockend, &priv->cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
+
+ io_uring_cmd_done(cmd, ret, 0, issue_flags);
+ add_rchar(current, ret);
+
+ for (unsigned long i = 0; i < priv->nr_pages; i++) {
+ __free_page(priv->pages[i]);
+ }
+
+ kfree(priv->pages);
+ kfree(priv->iov);
+ kfree(priv);
+}
+
+static void btrfs_uring_read_extent_cb(void *ctx, int err)
+{
+ struct btrfs_uring_priv *priv = ctx;
+
+ *(uintptr_t*)priv->cmd->pdu = (uintptr_t)priv;
+ io_uring_cmd_complete_in_task(priv->cmd, btrfs_uring_read_finished);
+}
+
+static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter,
+ u64 start, u64 lockend,
+ struct extent_state *cached_state,
+ u64 disk_bytenr, u64 disk_io_size,
+ size_t count, bool compressed,
+ struct iovec *iov,
+ struct io_uring_cmd *cmd)
+{
+ struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ struct page **pages;
+ struct btrfs_uring_priv *priv = NULL;
+ unsigned long nr_pages;
+ int ret;
+
+ nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
+ pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
+ if (!pages)
+ return -ENOMEM;
+ ret = btrfs_alloc_page_array(nr_pages, pages, 0);
+ if (ret) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ priv = kmalloc(sizeof(*priv), GFP_NOFS);
+ if (!priv) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ priv->iocb = *iocb;
+ priv->iov = iov;
+ priv->iter = *iter;
+ priv->count = count;
+ priv->cmd = cmd;
+ priv->cached_state = cached_state;
+ priv->compressed = compressed;
+ priv->nr_pages = nr_pages;
+ priv->pages = pages;
+ priv->start = start;
+ priv->lockend = lockend;
+
+ ret = btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
+ disk_io_size, pages,
+ btrfs_uring_read_extent_cb,
+ priv);
+ if (ret)
+ goto fail;
+
+ return -EIOCBQUEUED;
+
+fail:
+ unlock_extent(io_tree, start, lockend, &cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
+ kfree(priv);
+ return ret;
+}
+
+static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
+ flags);
+ size_t copy_end;
+ struct btrfs_ioctl_encoded_io_args args = {0};
+ int ret;
+ u64 disk_bytenr, disk_io_size;
+ struct file *file = cmd->file;
+ struct btrfs_inode *inode = BTRFS_I(file->f_inode);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+ loff_t pos;
+ struct kiocb kiocb;
+ struct extent_state *cached_state = NULL;
+ u64 start, lockend;
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out_acct;
+ }
+
+ if (issue_flags & IO_URING_F_COMPAT) {
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ struct btrfs_ioctl_encoded_io_args_32 args32;
+
+ copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
+ flags);
+ if (copy_from_user(&args32, (const void *)cmd->sqe->addr,
+ copy_end)) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+ args.iov = compat_ptr(args32.iov);
+ args.iovcnt = args32.iovcnt;
+ args.offset = args32.offset;
+ args.flags = args32.flags;
+#else
+ return -ENOTTY;
+#endif
+ } else {
+ copy_end = copy_end_kernel;
+ if (copy_from_user(&args, (const void *)cmd->sqe->addr,
+ copy_end)) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+ }
+
+ if (args.flags != 0)
+ return -EINVAL;
+
+ ret = import_iovec(ITER_DEST, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
+ &iov, &iter);
+ if (ret < 0)
+ goto out_acct;
+
+ if (iov_iter_count(&iter) == 0) {
+ ret = 0;
+ goto out_free;
+ }
+
+ pos = args.offset;
+ ret = rw_verify_area(READ, file, &pos, args.len);
+ if (ret < 0)
+ goto out_free;
+
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = pos;
+
+ start = ALIGN_DOWN(pos, fs_info->sectorsize);
+ lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
+
+ ret = btrfs_encoded_read(&kiocb, &iter, &args,
+ issue_flags & IO_URING_F_NONBLOCK,
+ &cached_state, &disk_bytenr, &disk_io_size);
+ if (ret < 0 && ret != -EIOCBQUEUED)
+ goto out_free;
+
+ file_accessed(file);
+
+ if (copy_to_user((void*)(uintptr_t)cmd->sqe->addr + copy_end,
+ (char *)&args + copy_end_kernel,
+ sizeof(args) - copy_end_kernel)) {
+ if (ret == -EIOCBQUEUED) {
+ unlock_extent(io_tree, start, lockend, &cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
+ }
+ ret = -EFAULT;
+ goto out_free;
+ }
+
+ if (ret == -EIOCBQUEUED) {
+ u64 count;
+
+ /* If we've optimized things by storing the iovecs on the stack,
+ * undo this. */
+ if (!iov) {
+ iov = kmalloc(sizeof(struct iovec) * args.iovcnt,
+ GFP_NOFS);
+ if (!iov) {
+ unlock_extent(io_tree, start, lockend,
+ &cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
+ ret = -ENOMEM;
+ goto out_acct;
+ }
+
+ memcpy(iov, iovstack,
+ sizeof(struct iovec) * args.iovcnt);
+ }
+
+ count = min_t(u64, iov_iter_count(&iter), disk_io_size);
+
+ ret = btrfs_uring_read_extent(&kiocb, &iter, start, lockend,
+ cached_state, disk_bytenr,
+ disk_io_size, count,
+ args.compression, iov, cmd);
+
+ goto out_acct;
+ }
+
+out_free:
+ kfree(iov);
+
+out_acct:
+ if (ret > 0)
+ add_rchar(current, ret);
+ inc_syscr(current);
+
+ return ret;
+}
+
+int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ switch (cmd->cmd_op) {
+ case BTRFS_IOC_ENCODED_READ:
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ case BTRFS_IOC_ENCODED_READ_32:
+#endif
+ return btrfs_uring_encoded_read(cmd, issue_flags);
+ }
+
+ return -EINVAL;
+}
+
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@@ -22,5 +22,6 @@ void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
int __pure btrfs_is_empty_uuid(const u8 *uuid);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs);
+int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
#endif
Adds an io_uring command for encoded reads, using the same interface as the existing BTRFS_IOC_ENCODED_READ ioctl. Signed-off-by: Mark Harmstone <maharmstone@fb.com> --- fs/btrfs/file.c | 1 + fs/btrfs/ioctl.c | 283 +++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ioctl.h | 1 + 3 files changed, 285 insertions(+)