diff mbox

[RFC,v2,72/83] File operation: fallocate.

Message ID 1520705944-6723-73-git-send-email-jix024@eng.ucsd.edu (mailing list archive)
State Changes Requested
Headers show

Commit Message

Andiry Xu March 10, 2018, 6:18 p.m. UTC
From: Andiry Xu <jix024@cs.ucsd.edu>

Fallocate works similar as writes, allocating zeroed blocked
for the holes in the request region.

Signed-off-by: Andiry Xu <jix024@cs.ucsd.edu>
---
 fs/nova/file.c | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nova/nova.h |   5 ++
 2 files changed, 153 insertions(+)
diff mbox

Patch

diff --git a/fs/nova/file.c b/fs/nova/file.c
index b94a9a3..a6b5bd3 100644
--- a/fs/nova/file.c
+++ b/fs/nova/file.c
@@ -113,6 +113,153 @@  static int nova_open(struct inode *inode, struct file *filp)
 	return generic_file_open(inode, filp);
 }
 
+static long nova_fallocate(struct file *file, int mode, loff_t offset,
+	loff_t len)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct super_block *sb = inode->i_sb;
+	struct nova_inode_info *si = NOVA_I(inode);
+	struct nova_inode_info_header *sih = &si->header;
+	struct nova_inode *pi;
+	struct nova_file_write_entry *entry;
+	struct nova_file_write_item *entry_item;
+	struct list_head item_head;
+	struct nova_inode_update update;
+	unsigned long start_blk, num_blocks, ent_blks = 0;
+	unsigned long total_blocks = 0;
+	unsigned long blocknr = 0;
+	unsigned long blockoff;
+	loff_t new_size;
+	long ret = 0;
+	int inplace = 0;
+	int blocksize_mask;
+	int allocated = 0;
+	timing_t fallocate_time;
+	u64 epoch_id;
+	u32 time;
+
+	/*
+	 * Fallocate does not make much sence for CoW,
+	 * but we still support it for DAX-mmap purpose.
+	 */
+
+	/* We only support the FALLOC_FL_KEEP_SIZE mode */
+	if (mode & ~FALLOC_FL_KEEP_SIZE)
+		return -EOPNOTSUPP;
+
+	if (S_ISDIR(inode->i_mode))
+		return -ENODEV;
+
+	INIT_LIST_HEAD(&item_head);
+	new_size = len + offset;
+	if (!(mode & FALLOC_FL_KEEP_SIZE) && new_size > inode->i_size) {
+		ret = inode_newsize_ok(inode, new_size);
+		if (ret)
+			return ret;
+	} else {
+		new_size = inode->i_size;
+	}
+
+	nova_dbgv("%s: inode %lu, offset %lld, count %lld, mode 0x%x\n",
+			__func__, inode->i_ino,	offset, len, mode);
+
+	NOVA_START_TIMING(fallocate_t, fallocate_time);
+	inode_lock(inode);
+	sih_lock(sih);
+
+	pi = nova_get_inode(sb, inode);
+	if (!pi) {
+		ret = -EACCES;
+		goto out;
+	}
+
+	inode->i_mtime = inode->i_ctime = current_time(inode);
+	time = current_time(inode).tv_sec;
+
+	blocksize_mask = sb->s_blocksize - 1;
+	start_blk = offset >> sb->s_blocksize_bits;
+	blockoff = offset & blocksize_mask;
+	num_blocks = (blockoff + len + blocksize_mask) >> sb->s_blocksize_bits;
+
+	epoch_id = nova_get_epoch_id(sb);
+	update.tail = sih->log_tail;
+	while (num_blocks > 0) {
+		ent_blks = nova_check_existing_entry(sb, inode, num_blocks,
+						start_blk, &entry,
+						1, epoch_id, &inplace);
+
+		if (entry && inplace) {
+			if (entry->size < new_size) {
+				/* Update existing entry */
+				entry->size = new_size;
+				nova_persist_entry(entry);
+			}
+			allocated = ent_blks;
+			goto next;
+		}
+
+		/* Allocate zeroed blocks to fill hole */
+		allocated = nova_new_data_blocks(sb, sih, &blocknr, start_blk,
+				 ent_blks, ALLOC_INIT_ZERO, ANY_CPU,
+				 ALLOC_FROM_HEAD);
+		nova_dbgv("%s: alloc %d blocks @ %lu\n", __func__,
+						allocated, blocknr);
+
+		if (allocated <= 0) {
+			nova_dbg("%s alloc %lu blocks failed!, %d\n",
+						__func__, ent_blks, allocated);
+			ret = allocated;
+			goto out;
+		}
+
+		entry_item = nova_alloc_file_write_item(sb);
+		if (!entry_item) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		/* Handle hole fill write */
+		nova_init_file_write_item(sb, sih, entry_item, epoch_id,
+					start_blk, allocated, blocknr,
+					time, new_size);
+
+		list_add_tail(&entry_item->list, &item_head);
+
+		total_blocks += allocated;
+next:
+		num_blocks -= allocated;
+		start_blk += allocated;
+	}
+
+	ret = nova_commit_writes_to_log(sb, pi, inode,
+					&item_head, total_blocks, 1);
+	if (ret < 0) {
+		nova_err(sb, "commit to log failed\n");
+		goto out;
+	}
+
+	if (ret || (mode & FALLOC_FL_KEEP_SIZE)) {
+		pi->i_flags |= cpu_to_le32(NOVA_EOFBLOCKS_FL);
+		sih->i_flags |= cpu_to_le32(NOVA_EOFBLOCKS_FL);
+	}
+
+	if (!(mode & FALLOC_FL_KEEP_SIZE) && new_size > inode->i_size) {
+		inode->i_size = new_size;
+		sih->i_size = new_size;
+	}
+
+	nova_persist_inode(pi);
+
+out:
+	if (ret < 0)
+		nova_cleanup_incomplete_write(sb, sih, &item_head, 1);
+
+	sih_unlock(sih);
+	inode_unlock(inode);
+	NOVA_END_TIMING(fallocate_t, fallocate_time);
+	return ret;
+}
+
 static ssize_t
 do_dax_mapping_read(struct file *filp, char __user *buf,
 	size_t len, loff_t *ppos)
@@ -477,6 +624,7 @@  const struct file_operations nova_dax_file_operations = {
 	.open		= nova_open,
 	.fsync		= nova_fsync,
 	.flush		= nova_flush,
+	.fallocate	= nova_fallocate,
 };
 
 const struct inode_operations nova_file_inode_operations = {
diff --git a/fs/nova/nova.h b/fs/nova/nova.h
index 6392bb3..ab9e8f3 100644
--- a/fs/nova/nova.h
+++ b/fs/nova/nova.h
@@ -477,6 +477,11 @@  void nova_init_file_write_item(struct super_block *sb,
 	struct nova_inode_info_header *sih, struct nova_file_write_item *item,
 	u64 epoch_id, u64 pgoff, int num_pages, u64 blocknr, u32 time,
 	u64 file_size);
+unsigned long nova_check_existing_entry(struct super_block *sb,
+	struct inode *inode, unsigned long num_blocks, unsigned long start_blk,
+	struct nova_file_write_entry **ret_entry,
+	int check_next, u64 epoch_id,
+	int *inplace);
 ssize_t nova_inplace_file_write(struct file *filp, const char __user *buf,
 	size_t len, loff_t *ppos);
 ssize_t do_nova_inplace_file_write(struct file *filp, const char __user *buf,