diff mbox series

[RFC,10/17] zuf: More file operation

Message ID 20190219115136.29952-11-boaz@plexistor.com (mailing list archive)
State New, archived
Headers show
Series zuf: ZUFS Zero-copy User-mode FileSystem | expand

Commit Message

Boaz Harrosh Feb. 19, 2019, 11:51 a.m. UTC
From: Boaz Harrosh <boazh@netapp.com>

Add more file operation.
Some are calling stubs in other files

Signed-off-by: Boaz Harrosh <boazh@netapp.com>
---
 fs/zuf/_extern.h  |   4 +
 fs/zuf/file.c     | 429 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/zuf/rw.c       |  12 ++
 fs/zuf/zuf-core.c |   4 +
 fs/zuf/zus_api.h  |  45 +++++
 5 files changed, 494 insertions(+)
diff mbox series

Patch

diff --git a/fs/zuf/_extern.h b/fs/zuf/_extern.h
index 32a381ac4bd7..391484b0e125 100644
--- a/fs/zuf/_extern.h
+++ b/fs/zuf/_extern.h
@@ -48,6 +48,10 @@  uint zuf_prepare_symname(struct zufs_ioc_new_inode *ioc_new_inode,
 
 
 /* rw.c */
+ssize_t zuf_rw_read_iter(struct super_block *sb, struct inode *inode,
+			 struct kiocb *kiocb, struct iov_iter *ii);
+ssize_t zuf_rw_write_iter(struct super_block *sb, struct inode *inode,
+			  struct kiocb *kiocb, struct iov_iter *ii);
 int zuf_trim_edge(struct inode *inode, ulong filepos, uint len);
 
 /* super.c */
diff --git a/fs/zuf/file.c b/fs/zuf/file.c
index c6c8ca71e957..0e62145e923a 100644
--- a/fs/zuf/file.c
+++ b/fs/zuf/file.c
@@ -13,14 +13,443 @@ 
  *	Sagi Manole <sagim@netapp.com>"
  */
 
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/falloc.h>
+#include <linux/mman.h>
+#include <linux/fadvise.h>
+#include <linux/delay.h>
 #include "zuf.h"
 
+static long zuf_fallocate(struct file *file, int mode, loff_t offset,
+			   loff_t len)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct zuf_inode_info *zii = ZUII(inode);
+	struct zufs_ioc_range ioc_range = {
+		.hdr.in_len = sizeof(ioc_range),
+		.hdr.operation = ZUFS_OP_FALLOCATE,
+		.zus_ii = ZUII(inode)->zus_ii,
+		.offset = offset,
+		.length = len,
+		.opflags = mode,
+	};
+	enum {FALLOC_RETRY = 7};
+	int retry = 0;
+	int err = 0;
+
+	zuf_dbg_vfs("[%ld] mode=0x%x offset=0x%llx len=0x%llx\n",
+		     inode->i_ino, mode, offset, len);
+
+	if (!S_ISREG(inode->i_mode))
+		return -EINVAL;
+
+	zuf_w_lock(zii);
+
+	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+	     (i_size_read(inode) < offset + len)) {
+		err = inode_newsize_ok(inode, offset + len);
+		if (unlikely(err))
+			goto out;
+	}
+
+	zus_inode_cmtime_now(inode, zii->zi);
+
+	if (mode & (FALLOC_FL_ZERO_RANGE | FALLOC_FL_PUNCH_HOLE)) {
+		/* ASSUMING FS supports these two */
+		struct super_block *sb = inode->i_sb;
+		ulong off1 = offset & (sb->s_blocksize - 1);
+		ulong off2 = (offset + len) & (sb->s_blocksize - 1);
+
+		if (md_o2p(offset) == md_o2p(offset + len)) {
+			/* Same block. Just nullify the range and goto out */
+			err = zuf_trim_edge(inode, offset, off2 - off1);
+			goto out_update;
+		}
+		if (off1) {
+			uint l = sb->s_blocksize - off1;
+
+			err = zuf_trim_edge(inode, offset, l);
+			if (unlikely(err))
+				goto out;
+			if (mode & FALLOC_FL_ZERO_RANGE) {
+				ioc_range.offset += l;
+				ioc_range.length -= l;
+			}
+		}
+		if (off2) {
+			err = zuf_trim_edge(inode, (offset + len) - off2, off2);
+			if (unlikely(err))
+				goto out;
+			if (mode & FALLOC_FL_ZERO_RANGE)
+				ioc_range.length -= off2;
+		}
+	}
+
+	/* no length remains, but size might have changed in trim_edge */
+	if (!ioc_range.length)
+		goto out_update;
+
+again:
+	err = zufc_dispatch(ZUF_ROOT(SBI(inode->i_sb)), &ioc_range.hdr,
+			    NULL, 0);
+	if (unlikely(err)) {
+		if (err == -EZUFS_RETRY) {
+			if (FALLOC_RETRY < retry++) {
+				zuf_dbg_err("[%ld] retry=%d\n",
+					    inode->i_ino, retry);
+				msleep(retry - FALLOC_RETRY);
+			}
+			goto again;
+		}
+		zuf_dbg_err("[%ld] zufc_dispatch failed => %d\n",
+			    inode->i_ino, err);
+	}
+
+out_update:
+	i_size_write(inode, le64_to_cpu(zii->zi->i_size));
+	inode->i_blocks = le64_to_cpu(zii->zi->i_blocks);
+
+out:
+	zuf_w_unlock(zii);
+
+	return err;
+}
+
+static loff_t zuf_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct zuf_inode_info *zii = ZUII(inode);
+	struct zufs_ioc_seek ioc_seek = {
+		.hdr.in_len = sizeof(ioc_seek),
+		.hdr.out_len = sizeof(ioc_seek),
+		.hdr.operation = ZUFS_OP_LLSEEK,
+		.zus_ii = zii->zus_ii,
+		.offset_in = offset,
+		.whence = whence,
+	};
+	int err = 0;
+
+	zuf_dbg_vfs("[%ld] offset=0x%llx whence=%d\n",
+		     inode->i_ino, offset, whence);
+
+	if (whence != SEEK_DATA && whence != SEEK_HOLE)
+		return generic_file_llseek(file, offset, whence);
+
+	zuf_r_lock(zii);
+
+	if ((offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) ||
+	    offset > inode->i_sb->s_maxbytes) {
+		err = -EINVAL;
+		goto out;
+	} else if (inode->i_size <= offset) {
+		err = -ENXIO;
+		goto out;
+	} else if (!inode->i_blocks) {
+		if (whence == SEEK_HOLE)
+			ioc_seek.offset_out = i_size_read(inode);
+		else
+			err = -ENXIO;
+		goto out;
+	}
+
+	err = zufc_dispatch(ZUF_ROOT(SBI(inode->i_sb)), &ioc_seek.hdr, NULL, 0);
+	if (unlikely(err)) {
+		zuf_dbg_err("zufc_dispatch failed => %d\n", err);
+		goto out;
+	}
+
+	if (ioc_seek.offset_out != file->f_pos) {
+		file->f_pos = ioc_seek.offset_out;
+		file->f_version = 0;
+	}
+
+out:
+	zuf_r_unlock(zii);
+
+	return err ?: ioc_seek.offset_out;
+}
+
+/* This callback is called when a file is closed */
+static int zuf_flush(struct file *file, fl_owner_t id)
+{
+	zuf_dbg_vfs("[%ld]\n", file->f_inode->i_ino);
+
+	return 0;
+}
+
+static int tozu_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		       u64 offset, u64 len)
+{
+	int err = -EOPNOTSUPP;
+	ulong start_index = md_o2p(offset);
+	ulong end_index = md_o2p_up(offset + len);
+	struct zuf_inode_info *zii = ZUII(inode);
+
+	zuf_dbg_vfs(
+		"[%ld] offset=0x%llx len=0x%llx i-start=0x%lx i-end=0x%lx\n",
+		inode->i_ino, offset, len, start_index, end_index);
+
+	if (fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))
+		return -EBADR;
+
+	zuf_r_lock(zii);
+
+	/* TODO: ZUS fiemap (&msi)*/
+
+	zuf_r_unlock(zii);
+	return err;
+}
+
+static void _lock_two_ziis(struct zuf_inode_info *zii1,
+			   struct zuf_inode_info *zii2)
+{
+	if (zii1 > zii2)
+		swap(zii2, zii2);
+
+	zuf_w_lock(zii1);
+	if (zii1 != zii2)
+		zuf_w_lock_nested(zii2);
+}
+
+static void _unlock_two_ziis(struct zuf_inode_info *zii1,
+		      struct zuf_inode_info *zii2)
+{
+	if (zii1 > zii2)
+		swap(zii2, zii2);
+
+	if (zii1 != zii2)
+		zuf_w_unlock(zii2);
+	zuf_w_unlock(zii1);
+}
+
+static int _clone_file_range(struct inode *src_inode, loff_t pos_in,
+			     struct inode *dst_inode, loff_t pos_out,
+			     u64 len, u64 len_up, int operation)
+{
+	struct zuf_inode_info *src_zii = ZUII(src_inode);
+	struct zuf_inode_info *dst_zii = ZUII(dst_inode);
+	struct zus_inode *dst_zi = dst_zii->zi;
+	struct super_block *sb = src_inode->i_sb;
+	struct zufs_ioc_clone ioc_clone = {
+		.hdr.in_len = sizeof(ioc_clone),
+		.hdr.out_len = sizeof(ioc_clone),
+		.hdr.operation = operation,
+		.src_zus_ii = src_zii->zus_ii,
+		.dst_zus_ii = dst_zii->zus_ii,
+		.pos_in = pos_in,
+		.pos_out = pos_out,
+		.len = len,
+		.len_up = len_up,
+	};
+	int err;
+
+	_lock_two_ziis(src_zii, dst_zii);
+
+	/* NOTE: len==0 means to-end-of-file which is what we want */
+	unmap_mapping_range(src_inode->i_mapping, pos_in,  len, 0);
+	unmap_mapping_range(dst_inode->i_mapping, pos_out, len, 0);
+
+	zus_inode_cmtime_now(dst_inode, dst_zi);
+	err = zufc_dispatch(ZUF_ROOT(SBI(sb)), &ioc_clone.hdr, NULL, 0);
+	if (unlikely(err && err != -EINTR)) {
+		zuf_err("failed to clone %ld -> %ld ; err=%d\n",
+			 src_inode->i_ino, dst_inode->i_ino, err);
+		goto out;
+	}
+
+	dst_inode->i_blocks = le64_to_cpu(dst_zi->i_blocks);
+	i_size_write(dst_inode, dst_zi->i_size);
+
+out:
+	_unlock_two_ziis(src_zii, dst_zii);
+
+	return err;
+}
+
+static loff_t zuf_clone_file_range(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				loff_t len, uint remap_flags)
+{
+	struct inode *src_inode = file_inode(file_in);
+	struct inode *dst_inode = file_inode(file_out);
+	ulong src_size = i_size_read(src_inode);
+	ulong dst_size = i_size_read(dst_inode);
+	struct super_block *sb = src_inode->i_sb;
+	ulong len_up = len;
+	int err;
+
+	zuf_dbg_vfs(
+		"ino-in=%ld ino-out=%ld pos_in=0x%llx pos_out=0x%llx length=0x%llx\n",
+		src_inode->i_ino, dst_inode->i_ino, pos_in, pos_out, len);
+
+	if (remap_flags & ~REMAP_FILE_ADVISORY)
+		return -EINVAL;
+
+	if (src_inode == dst_inode) {
+		if (pos_in == pos_out) {
+			zuf_dbg_err("[%ld] Clone nothing!!\n",
+				src_inode->i_ino);
+			return 0;
+		}
+		if (pos_in < pos_out) {
+			if (pos_in + len > pos_out) {
+				zuf_dbg_err(
+					"[%ld] overlapping pos_in < pos_out?? => EINVAL\n",
+					src_inode->i_ino);
+				return -EINVAL;
+			}
+		} else {
+			if (pos_out + len > pos_in) {
+				zuf_dbg_err("[%ld] overlapping pos_out < pos_in?? => EINVAL\n",
+					src_inode->i_ino);
+				return -EINVAL;
+			}
+		}
+	}
+
+	if ((pos_in & (sb->s_blocksize - 1)) ||
+	    (pos_out & (sb->s_blocksize - 1))) {
+		zuf_err("[%ld] Not aligned len=0x%llx pos_in=0x%llx "
+			"pos_out=0x%llx src-size=0x%llx dst-size=0x%llx\n",
+			 src_inode->i_ino, len, pos_in, pos_out,
+			 i_size_read(src_inode), i_size_read(dst_inode));
+		return -EINVAL;
+	}
+
+	/* STD says that len==0 means up to end of SRC */
+	if (!len)
+		len_up = len = src_size - pos_in;
+
+	if (!pos_in && !pos_out && (src_size <= pos_in + len) &&
+	    (dst_size <= src_size)) {
+		len_up = 0;
+	} else if (len & (sb->s_blocksize - 1)) {
+		/* un-aligned len, see if it is beyond EOF */
+		if ((src_size > pos_in  + len) ||
+		    (dst_size > pos_out + len)) {
+			zuf_err("[%ld] Not aligned len=0x%llx pos_in=0x%llx "
+				"pos_out=0x%llx src-size=0x%lx dst-size=0x%lx\n",
+				src_inode->i_ino, len, pos_in, pos_out,
+				src_size, dst_size);
+			return -EINVAL;
+		}
+		len_up = md_p2o(md_o2p_up(len));
+	}
+
+	err = _clone_file_range(src_inode, pos_in, dst_inode, pos_out, len,
+				len_up, ZUFS_OP_CLONE);
+	if (unlikely(err))
+		zuf_err("_clone_file_range failed => %d\n", err);
+
+	return err ? err : len;
+}
+
+static ssize_t zuf_copy_file_range(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   size_t len, uint flags)
+{
+	struct inode *src_inode = file_inode(file_in);
+	struct inode *dst_inode = file_inode(file_out);
+	ssize_t ret;
+
+	zuf_dbg_vfs("ino-in=%ld ino-out=%ld pos_in=0x%llx pos_out=0x%llx length=0x%lx\n",
+		    src_inode->i_ino, dst_inode->i_ino, pos_in, pos_out, len);
+
+	ret = zuf_clone_file_range(file_in, pos_in, file_out, pos_out, len,
+				   REMAP_FILE_ADVISORY);
+
+	return ret ?: len;
+}
+
+/* ZUFS:
+ * make sure we clean up the resources consumed by zufs_init()
+ */
+static int zuf_file_release(struct inode *inode, struct file *filp)
+{
+	if (unlikely(filp->private_data))
+		zuf_err("not yet\n");
+
+	return 0;
+}
+
+static ssize_t zuf_read_iter(struct kiocb *kiocb, struct iov_iter *ii)
+{
+	struct inode *inode = file_inode(kiocb->ki_filp);
+	struct zuf_inode_info *zii = ZUII(inode);
+	ssize_t ret;
+
+	zuf_dbg_vfs("[%ld] ppos=0x%llx len=0x%zx\n",
+		     inode->i_ino, kiocb->ki_pos, iov_iter_count(ii));
+
+	file_accessed(kiocb->ki_filp);
+
+	zuf_r_lock(zii);
+
+	ret = zuf_rw_read_iter(inode->i_sb, inode, kiocb, ii);
+
+	zuf_r_unlock(zii);
+
+	zuf_dbg_vfs("[%ld] => 0x%lx\n", inode->i_ino, ret);
+	return ret;
+}
+
+static ssize_t zuf_write_iter(struct kiocb *kiocb, struct iov_iter *ii)
+{
+	struct inode *inode = file_inode(kiocb->ki_filp);
+	struct zuf_inode_info *zii = ZUII(inode);
+	ssize_t ret;
+
+	ret = generic_write_checks(kiocb, ii);
+	if (unlikely(ret < 0)) {
+		zuf_dbg_vfs("[%ld] generic_write_checks => 0x%lx\n",
+			    inode->i_ino, ret);
+		return ret;
+	}
+
+	zuf_r_lock(zii);
+
+	ret = file_remove_privs(kiocb->ki_filp);
+	if (unlikely(ret < 0))
+		goto out;
+
+	zus_inode_cmtime_now(inode, zii->zi);
+
+	ret = zuf_rw_write_iter(inode->i_sb, inode, kiocb, ii);
+	if (unlikely(ret < 0))
+		goto out;
+
+	if (i_size_read(inode) <= le64_to_cpu(zii->zi->i_size))
+		i_size_write(inode, le64_to_cpu(zii->zi->i_size));
+
+	inode->i_blocks = le64_to_cpu(zii->zi->i_blocks);
+
+out:
+	zuf_r_unlock(zii);
+
+	zuf_dbg_vfs("[%ld] => 0x%lx\n", inode->i_ino, ret);
+	return ret;
+}
+
 const struct file_operations zuf_file_operations = {
+	.llseek			= zuf_llseek,
+	.read_iter		= zuf_read_iter,
+	.write_iter		= zuf_write_iter,
 	.open			= generic_file_open,
+	.flush			= zuf_flush,
+	.release		= zuf_file_release,
+	.fallocate		= zuf_fallocate,
+	.copy_file_range	= zuf_copy_file_range,
+	.remap_file_range	= zuf_clone_file_range,
 };
 
 const struct inode_operations zuf_file_inode_operations = {
 	.setattr	= zuf_setattr,
 	.getattr	= zuf_getattr,
 	.update_time	= zuf_update_time,
+	.fiemap		= tozu_fiemap,
 };
diff --git a/fs/zuf/rw.c b/fs/zuf/rw.c
index 1eb8453da564..335bfd256499 100644
--- a/fs/zuf/rw.c
+++ b/fs/zuf/rw.c
@@ -23,3 +23,15 @@  int zuf_trim_edge(struct inode *inode, ulong filepos, uint len)
 {
 	return -EIO;
 }
+
+ssize_t zuf_rw_read_iter(struct super_block *sb, struct inode *inode,
+			 struct kiocb *kiocb, struct iov_iter *ii)
+{
+	return -EIO;
+}
+
+ssize_t zuf_rw_write_iter(struct super_block *sb, struct inode *inode,
+			  struct kiocb *kiocb, struct iov_iter *ii)
+{
+	return -EIO;
+}
diff --git a/fs/zuf/zuf-core.c b/fs/zuf/zuf-core.c
index 3a264e6475c4..96ffc6244daa 100644
--- a/fs/zuf/zuf-core.c
+++ b/fs/zuf/zuf-core.c
@@ -774,8 +774,12 @@  const char *zuf_op_name(enum e_zufs_operation op)
 		CASE_ENUM_NAME(ZUFS_OP_REMOVE_DENTRY	);
 		CASE_ENUM_NAME(ZUFS_OP_RENAME		);
 		CASE_ENUM_NAME(ZUFS_OP_READDIR		);
+		CASE_ENUM_NAME(ZUFS_OP_CLONE		);
+		CASE_ENUM_NAME(ZUFS_OP_COPY		);
 		CASE_ENUM_NAME(ZUFS_OP_GET_SYMLINK	);
 		CASE_ENUM_NAME(ZUFS_OP_SETATTR		);
+		CASE_ENUM_NAME(ZUFS_OP_FALLOCATE	);
+		CASE_ENUM_NAME(ZUFS_OP_LLSEEK		);
 		CASE_ENUM_NAME(ZUFS_OP_BREAK		);
 	default:
 		return "UNKNOWN";
diff --git a/fs/zuf/zus_api.h b/fs/zuf/zus_api.h
index 74f69a12a263..32e8c2cae518 100644
--- a/fs/zuf/zus_api.h
+++ b/fs/zuf/zus_api.h
@@ -337,9 +337,13 @@  enum e_zufs_operation {
 	ZUFS_OP_REMOVE_DENTRY,
 	ZUFS_OP_RENAME,
 	ZUFS_OP_READDIR,
+	ZUFS_OP_CLONE,
+	ZUFS_OP_COPY,
 
 	ZUFS_OP_GET_SYMLINK,
 	ZUFS_OP_SETATTR,
+	ZUFS_OP_FALLOCATE,
+	ZUFS_OP_LLSEEK,
 
 	ZUFS_OP_BREAK,		/* Kernel telling Server to exit */
 	ZUFS_OP_MAX_OPT,
@@ -528,6 +532,47 @@  struct zufs_ioc_attr {
 	__u32 pad;
 };
 
+enum ZUFS_RANGE_FLAGS {
+	ZUFS_RF_DONTNEED		= 0x00000001,
+};
+
+/* ZUFS_OP_ISYNC, ZUFS_OP_FALLOCATE */
+struct zufs_ioc_range {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_inode_info *zus_ii;
+	__u64 offset, length;
+	__u32 opflags;
+	__u32 ioc_flags;
+
+	/* OUT */
+	__u64 write_unmapped;
+};
+
+/* ZUFS_OP_CLONE */
+struct zufs_ioc_clone {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_inode_info *src_zus_ii;
+	struct zus_inode_info *dst_zus_ii;
+	__u64 pos_in, pos_out;
+	__u64 len;
+	__u64 len_up;
+};
+
+/* ZUFS_OP_LLSEEK */
+struct zufs_ioc_seek {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_inode_info *zus_ii;
+	__u64 offset_in;
+	__u32 whence;
+	__u32 pad;
+
+	/* OUT */
+	__u64 offset_out;
+};
+
 /* Allocate a special_file that will be a dual-port communication buffer with
  * user mode.
  * Server will access the buffer via the mmap of this file.