@@ -922,7 +922,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
}
#ifdef CONFIG_BLK_DEV_ZONED
-static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
+bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
{
struct block_device *bdev = sbi->sb->s_bdev;
int devi = 0;
@@ -4207,6 +4207,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
struct iomap *srcmap)
{
struct f2fs_map_blocks map = {};
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
pgoff_t next_pgofs = 0;
int err;
@@ -4218,6 +4219,18 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (flags & IOMAP_WRITE)
map.m_may_create = true;
+ if (f2fs_sb_has_blkzoned(sbi) && !f2fs_is_pinned_file(inode)) {
+ struct f2fs_rwsem *io_order_lock =
+ &sbi->io_order_lock[map.m_seg_type];
+
+ f2fs_down_write(io_order_lock);
+
+ /* set io order lock */
+ iomap->private = io_order_lock;
+ } else {
+ iomap->private = NULL;
+ }
+
err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO);
if (err)
return err;
@@ -4273,6 +4286,19 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
return 0;
}
+static int f2fs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+ ssize_t written, unsigned int flags, struct iomap *iomap)
+{
+ struct f2fs_rwsem *io_order_lock = iomap->private;
+
+ /* ordered write */
+ if (io_order_lock)
+ f2fs_up_write(io_order_lock);
+
+ return 0;
+}
+
const struct iomap_ops f2fs_iomap_ops = {
.iomap_begin = f2fs_iomap_begin,
+ .iomap_end = f2fs_iomap_end,
};
@@ -1582,8 +1582,8 @@ struct f2fs_sb_info {
/* for bio operations */
struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */
- /* keep migration IO order for LFS mode */
- struct f2fs_rwsem io_order_lock;
+ /* keep IO order for LFS mode */
+ struct f2fs_rwsem io_order_lock[NR_CURSEG_DATA_TYPE];
pgoff_t page_eio_ofs[NR_PAGE_TYPE]; /* EIO page offset */
int page_eio_cnt[NR_PAGE_TYPE]; /* EIO count */
@@ -3863,6 +3863,9 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
int f2fs_submit_page_bio(struct f2fs_io_info *fio);
int f2fs_merge_page_bio(struct f2fs_io_info *fio);
+#ifdef CONFIG_BLK_DEV_ZONED
+bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr);
+#endif
void f2fs_submit_page_write(struct f2fs_io_info *fio);
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
block_t blk_addr, sector_t *sector);
@@ -869,13 +869,7 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw)
/* disallow direct IO if any of devices has unaligned blksize */
if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
return true;
- /*
- * for blkzoned device, fallback direct IO to buffered IO, so
- * all IOs can be serialized by log-structured write.
- */
- if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) &&
- !f2fs_is_pinned_file(inode))
- return true;
+
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
return true;
@@ -4815,6 +4809,17 @@ static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
return 0;
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static void f2fs_dio_zone_write_end_io(struct bio *bio)
+{
+ struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
+
+ bio->bi_private = io->bi_private;
+ complete(&io->zone_wait);
+ iomap_dio_bio_end_io(bio);
+}
+#endif
+
static void f2fs_dio_write_submit_io(const struct iomap_iter *iter,
struct bio *bio, loff_t file_offset)
{
@@ -4824,6 +4829,31 @@ static void f2fs_dio_write_submit_io(const struct iomap_iter *iter,
enum temp_type temp = f2fs_get_segment_temp(seg_type);
bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi) && !f2fs_is_pinned_file(inode)) {
+ struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
+ block_t last_blkaddr = SECTOR_TO_BLOCK(bio_end_sector(bio) - 1);
+
+ f2fs_down_write(&io->io_rwsem);
+ if (io->zone_pending_bio) {
+ wait_for_completion_io(&io->zone_wait);
+ bio_put(io->zone_pending_bio);
+ io->zone_pending_bio = NULL;
+ io->bi_private = NULL;
+ }
+
+ if (is_end_zone_blkaddr(sbi, last_blkaddr)) {
+ bio_get(bio);
+ reinit_completion(&io->zone_wait);
+ io->bi_private = bio->bi_private;
+ bio->bi_private = io;
+ bio->bi_end_io = f2fs_dio_zone_write_end_io;
+ io->zone_pending_bio = bio;
+ }
+ f2fs_up_write(&io->io_rwsem);
+ }
+#endif
submit_bio(bio);
}
@@ -4897,6 +4927,10 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
dio_flags = 0;
if (pos + count > inode->i_size)
dio_flags |= IOMAP_DIO_FORCE_WAIT;
+
+ if (f2fs_sb_has_blkzoned(sbi) && !f2fs_is_pinned_file(inode))
+ dio_flags |= IOMAP_DIO_FORCE_WAIT;
+
dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
&f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
if (IS_ERR_OR_NULL(dio)) {
@@ -1361,7 +1361,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
if (lfs_mode)
- f2fs_down_write(&fio.sbi->io_order_lock);
+ f2fs_down_write(&fio.sbi->io_order_lock[CURSEG_COLD_DATA]);
mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi),
fio.old_blkaddr, false);
@@ -1444,7 +1444,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
true, true, true);
up_out:
if (lfs_mode)
- f2fs_up_write(&fio.sbi->io_order_lock);
+ f2fs_up_write(&fio.sbi->io_order_lock[CURSEG_COLD_DATA]);
put_out:
f2fs_put_dnode(&dn);
out:
@@ -3796,10 +3796,10 @@ void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
int type = __get_segment_type(fio);
- bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
+ bool keep_order = (f2fs_lfs_mode(fio->sbi) && type <= CURSEG_COLD_DATA);
if (keep_order)
- f2fs_down_read(&fio->sbi->io_order_lock);
+ f2fs_down_read(&fio->sbi->io_order_lock[type]);
if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
&fio->new_blkaddr, sum, type, fio)) {
@@ -3819,7 +3819,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
out:
if (keep_order)
- f2fs_up_read(&fio->sbi->io_order_lock);
+ f2fs_up_read(&fio->sbi->io_order_lock[type]);
}
void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio,
@@ -3833,7 +3833,10 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
- init_f2fs_rwsem(&sbi->io_order_lock);
+
+ for (i = 0; i < NR_CURSEG_DATA_TYPE; i++)
+ init_f2fs_rwsem(&sbi->io_order_lock[i]);
+
spin_lock_init(&sbi->cp_lock);
sbi->dirty_device = 0;
With zoned storage, F2FS avoids direct IO writes and uses buffered writes with page cache flushes to prevent unaligned writes. However, the unaligned write can be avoided by allowing only a single thread per zone to perform direct writes. To achieve direct writes in zoned storage, it uses semephores to serialize block allocation and writes per zone. Signed-off-by: Daejun Park <daejun7.park@samsung.com> --- fs/f2fs/data.c | 28 ++++++++++++++++++++++++++- fs/f2fs/f2fs.h | 7 +++++-- fs/f2fs/file.c | 48 ++++++++++++++++++++++++++++++++++++++++------- fs/f2fs/gc.c | 4 ++-- fs/f2fs/segment.c | 6 +++--- fs/f2fs/super.c | 5 ++++- 6 files changed, 82 insertions(+), 16 deletions(-)