@@ -1363,11 +1363,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
f2fs_invalidate_compress_page(sbi, old_blkaddr);
}
f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
-
- /*
- * i_size will be updated by direct_IO. Otherwise, we'll get stale
- * data from unwritten block via dio_read.
- */
return 0;
}
@@ -3130,7 +3125,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
FS_CP_DATA_IO : FS_DATA_IO);
}
-static void f2fs_write_failed(struct inode *inode, loff_t to)
+void f2fs_write_failed(struct inode *inode, loff_t to)
{
loff_t i_size = i_size_read(inode);
@@ -3238,10 +3238,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
spin_lock(&sbi->iostat_lock);
sbi->rw_iostat[type] += io_bytes;
- if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
- sbi->rw_iostat[APP_BUFFERED_IO] =
- sbi->rw_iostat[APP_WRITE_IO] -
- sbi->rw_iostat[APP_DIRECT_IO];
+ if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO)
+ sbi->rw_iostat[APP_WRITE_IO] += io_bytes;
if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO)
sbi->rw_iostat[APP_READ_IO] += io_bytes;
@@ -3625,6 +3623,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
struct writeback_control *wbc,
enum iostat_type io_type,
int compr_blocks, bool allow_balance);
+void f2fs_write_failed(struct inode *inode, loff_t to);
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length);
int f2fs_release_page(struct page *page, gfp_t wait);
@@ -4292,6 +4292,29 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
}
+static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ ssize_t count;
+ int err;
+
+ if (IS_IMMUTABLE(inode))
+ return -EPERM;
+
+ if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
+ return -EPERM;
+
+ count = generic_write_checks(iocb, from);
+ if (count <= 0)
+ return count;
+
+ err = file_modified(file);
+ if (err)
+ return err;
+ return count;
+}
+
/*
* Preallocate blocks for a write request, if it is possible and helpful to do
* so. Returns a positive number if blocks may have been preallocated, 0 if no
@@ -4299,15 +4322,14 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
* seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the
* requested blocks (not just some of them) have been allocated.
*/
-static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
+static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
+ bool dio)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
const loff_t pos = iocb->ki_pos;
const size_t count = iov_iter_count(iter);
struct f2fs_map_blocks map = {};
- bool dio = (iocb->ki_flags & IOCB_DIRECT) &&
- !f2fs_force_buffered_io(inode, iocb, iter);
int flag;
int ret;
@@ -4352,13 +4374,153 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
return map.m_len;
}
-static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
+ struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
+ ssize_t ret;
+
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
+ current->backing_dev_info = inode_to_bdi(inode);
+ ret = generic_perform_write(file, from, iocb->ki_pos);
+ current->backing_dev_info = NULL;
+
+ if (ret > 0) {
+ iocb->ki_pos += ret;
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret);
+ }
+ return ret;
+}
+
+static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
+ bool *may_need_sync)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ const bool do_opu = f2fs_lfs_mode(sbi);
+ const int whint_mode = F2FS_OPTION(sbi).whint_mode;
+ const loff_t pos = iocb->ki_pos;
+ const ssize_t count = iov_iter_count(from);
+ const enum rw_hint hint = iocb->ki_hint;
+ unsigned int dio_flags = 0;
+ ssize_t ret;
+
+ trace_f2fs_direct_IO_enter(inode, pos, count, WRITE);
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ /* f2fs_convert_inline_inode() and block allocation can block */
+ if (f2fs_has_inline_data(inode) ||
+ !f2fs_overwrite_io(inode, pos, count)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ } else {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ goto out;
+ }
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ up_read(&fi->i_gc_rwsem[WRITE]);
+ ret = -EAGAIN;
+ goto out;
+ }
+ } else {
+ down_read(&fi->i_gc_rwsem[WRITE]);
+ if (do_opu)
+ down_read(&fi->i_gc_rwsem[READ]);
+ }
+
+ if (whint_mode == WHINT_MODE_OFF)
+ iocb->ki_hint = WRITE_LIFE_NOT_SET;
+
+ if (pos + count > inode->i_size)
+ dio_flags |= IOMAP_DIO_FORCE_WAIT;
+ ret = iomap_dio_rw(iocb, from, &f2fs_iomap_ops, &f2fs_iomap_dio_ops,
+ dio_flags);
+ if (ret == -ENOTBLK)
+ ret = 0;
+
+ if (whint_mode == WHINT_MODE_OFF)
+ iocb->ki_hint = hint;
+
+ if (do_opu)
+ up_read(&fi->i_gc_rwsem[READ]);
+
+ up_read(&fi->i_gc_rwsem[WRITE]);
+
+ if (ret < 0) {
+ if (ret == -EIOCBQUEUED)
+ f2fs_update_iostat(sbi, APP_DIRECT_IO,
+ count - iov_iter_count(from));
+ goto out;
+ }
+ if (pos + ret > inode->i_size)
+ f2fs_i_size_write(inode, pos + ret);
+ f2fs_update_iostat(sbi, APP_DIRECT_IO, ret);
+ if (!do_opu)
+ set_inode_flag(inode, FI_UPDATE_WRITE);
+
+ if (iov_iter_count(from)) {
+ ssize_t ret2;
+ loff_t bufio_start_pos = iocb->ki_pos;
+
+ /*
+ * The direct write was partial, so we need to fall back to a
+ * buffered write for the remainder.
+ */
+
+ ret2 = f2fs_buffered_write_iter(iocb, from);
+ if (iov_iter_count(from))
+ f2fs_write_failed(inode, iocb->ki_pos);
+ if (ret2 < 0)
+ goto out;
+
+ /*
+ * Ensure that the pagecache pages are written to disk and
+ * invalidated to preserve the expected O_DIRECT semantics.
+ */
+ if (ret2 > 0) {
+ loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
+
+ ret += ret2;
+
+ ret2 = filemap_write_and_wait_range(file->f_mapping,
+ bufio_start_pos,
+ bufio_end_pos);
+ if (ret2 < 0)
+ goto out;
+ invalidate_mapping_pages(file->f_mapping,
+ bufio_start_pos >> PAGE_SHIFT,
+ bufio_end_pos >> PAGE_SHIFT);
+ }
+ } else {
+ /* iomap_dio_rw() already handled the generic_write_sync(). */
+ *may_need_sync = false;
+ }
+out:
+ trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
+ return ret;
+}
+
+static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
const loff_t orig_pos = iocb->ki_pos;
const size_t orig_count = iov_iter_count(from);
loff_t target_size;
+ bool dio;
+ bool may_need_sync = true;
int preallocated;
ssize_t ret;
@@ -4381,48 +4543,26 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
}
- if (unlikely(IS_IMMUTABLE(inode))) {
- ret = -EPERM;
- goto out_unlock;
- }
-
- if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
- ret = -EPERM;
- goto out_unlock;
- }
-
- ret = generic_write_checks(iocb, from);
+ ret = f2fs_write_checks(iocb, from);
if (ret <= 0)
goto out_unlock;
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!f2fs_overwrite_io(inode, iocb->ki_pos,
- iov_iter_count(from)) ||
- f2fs_has_inline_data(inode) ||
- f2fs_force_buffered_io(inode, iocb, from)) {
- ret = -EAGAIN;
- goto out_unlock;
- }
- }
- if (iocb->ki_flags & IOCB_DIRECT) {
- /*
- * Convert inline data for Direct I/O before entering
- * f2fs_direct_IO().
- */
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- goto out_unlock;
- }
+ /* Determine whether we will do a direct write or a buffered write. */
+ dio = f2fs_should_use_dio(inode, iocb, from);
/* Possibly preallocate the blocks for the write. */
target_size = iocb->ki_pos + iov_iter_count(from);
- preallocated = f2fs_preallocate_blocks(iocb, from);
+ preallocated = f2fs_preallocate_blocks(iocb, from, dio);
if (preallocated < 0) {
ret = preallocated;
goto out_unlock;
}
- ret = __generic_file_write_iter(iocb, from);
+ /* Do the actual write. */
+ if (dio)
+ ret = f2fs_dio_write_iter(iocb, from, &may_need_sync);
+ else
+ ret = f2fs_buffered_write_iter(iocb, from);
/* Don't leave any preallocated blocks around past i_size. */
if (preallocated > 0 && inode->i_size < target_size) {
@@ -4433,14 +4573,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
clear_inode_flag(inode, FI_PREALLOCATED_ALL);
-
- if (ret > 0)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
out_unlock:
inode_unlock(inode);
out:
trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
- if (ret > 0)
+ if (ret > 0 && may_need_sync)
ret = generic_write_sync(iocb, ret);
return ret;
}