Message ID | 20210716143919.44373-7-ebiggers@kernel.org (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | f2fs: use iomap for direct I/O | expand |
On Fri, Jul 16, 2021 at 09:39:16AM -0500, Eric Biggers wrote: > +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap, > + struct bio *bio, loff_t file_offset) > +{ > + struct f2fs_private_dio *dio; > + bool write = (bio_op(bio) == REQ_OP_WRITE); > + > + dio = f2fs_kzalloc(F2FS_I_SB(inode), > + sizeof(struct f2fs_private_dio), GFP_NOFS); > + if (!dio) > + goto out; > + > + dio->inode = inode; > + dio->orig_end_io = bio->bi_end_io; > + dio->orig_private = bio->bi_private; > + dio->write = write; > + > + bio->bi_end_io = f2fs_dio_end_io; > + bio->bi_private = dio; > + > + inc_page_count(F2FS_I_SB(inode), > + write ? F2FS_DIO_WRITE : F2FS_DIO_READ); > + > + return submit_bio(bio); I don't think there is any need for this mess. The F2FS_DIO_WRITE / F2FS_DIO_READ counts are only used to check if there is any inflight I/O at all. So instead we can increment them once before calling iomap_dio_rw, and decrement them in ->end_io or for a failure/noop exit from iomap_dio_rw. Untested patch below. Note that all this would be much simpler to review if the last three patches were folded into a single one. diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4fbf28f5aaab..9f9cc49fbe94 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3369,50 +3369,6 @@ static int f2fs_write_end(struct file *file, return copied; } -static void f2fs_dio_end_io(struct bio *bio) -{ - struct f2fs_private_dio *dio = bio->bi_private; - - dec_page_count(F2FS_I_SB(dio->inode), - dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ); - - bio->bi_private = dio->orig_private; - bio->bi_end_io = dio->orig_end_io; - - kfree(dio); - - bio_endio(bio); -} - -static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap, - struct bio *bio, loff_t file_offset) -{ - struct f2fs_private_dio *dio; - bool write = (bio_op(bio) == REQ_OP_WRITE); - - dio = f2fs_kzalloc(F2FS_I_SB(inode), - sizeof(struct f2fs_private_dio), GFP_NOFS); - if (!dio) - goto out; - - dio->inode = inode; - dio->orig_end_io = bio->bi_end_io; - dio->orig_private = bio->bi_private; - dio->write = write; - - bio->bi_end_io = f2fs_dio_end_io; - bio->bi_private = dio; - - inc_page_count(F2FS_I_SB(inode), - write ? F2FS_DIO_WRITE : F2FS_DIO_READ); - - return submit_bio(bio); -out: - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); - return BLK_QC_T_NONE; -} - void f2fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length) { @@ -4006,6 +3962,18 @@ const struct iomap_ops f2fs_iomap_ops = { .iomap_begin = f2fs_iomap_begin, }; +static int f2fs_dio_end_io(struct kiocb *iocb, ssize_t size, int error, + unsigned flags) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); + + if (iocb->ki_flags & IOCB_WRITE) + dec_page_count(sbi, F2FS_DIO_WRITE); + else + dec_page_count(sbi, F2FS_DIO_READ); + return 0; +} + const struct iomap_dio_ops f2fs_iomap_dio_ops = { - .submit_io = f2fs_dio_submit_bio, + .end_io = f2fs_dio_end_io, }; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6dbbac05a15c..abd521dc504a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1750,13 +1750,6 @@ struct f2fs_sb_info { #endif }; -struct f2fs_private_dio { - struct inode *inode; - void *orig_private; - bio_end_io_t *orig_end_io; - bool write; -}; - #ifdef CONFIG_F2FS_FAULT_INJECTION #define f2fs_show_injection_info(sbi, type) \ printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6b8eac6b25d4..4fed90cc1462 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4259,6 +4259,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) down_read(&fi->i_gc_rwsem[READ]); } + inc_page_count(F2FS_I_SB(inode), F2FS_DIO_READ); ret = iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0); up_read(&fi->i_gc_rwsem[READ]); @@ -4270,6 +4271,8 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) else if (ret == -EIOCBQUEUED) f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO, count - iov_iter_count(to)); + else + dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ); out: trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); return ret; @@ -4446,6 +4449,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, if (pos + count > inode->i_size) dio_flags |= IOMAP_DIO_FORCE_WAIT; + inc_page_count(F2FS_I_SB(inode), F2FS_DIO_WRITE); ret = iomap_dio_rw(iocb, from, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, dio_flags); if (ret == -ENOTBLK) @@ -4459,6 +4463,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, up_read(&fi->i_gc_rwsem[WRITE]); + if (ret <= 0 && ret != -EIOCBQUEUED) + dec_page_count(F2FS_I_SB(inode), F2FS_DIO_WRITE); + if (ret < 0) { if (ret == -EIOCBQUEUED) f2fs_update_iostat(sbi, APP_DIRECT_IO,
On 07/19, Christoph Hellwig wrote: > On Fri, Jul 16, 2021 at 09:39:16AM -0500, Eric Biggers wrote: > > +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap, > > + struct bio *bio, loff_t file_offset) > > +{ > > + struct f2fs_private_dio *dio; > > + bool write = (bio_op(bio) == REQ_OP_WRITE); > > + > > + dio = f2fs_kzalloc(F2FS_I_SB(inode), > > + sizeof(struct f2fs_private_dio), GFP_NOFS); > > + if (!dio) > > + goto out; > > + > > + dio->inode = inode; > > + dio->orig_end_io = bio->bi_end_io; > > + dio->orig_private = bio->bi_private; > > + dio->write = write; > > + > > + bio->bi_end_io = f2fs_dio_end_io; > > + bio->bi_private = dio; > > + > > + inc_page_count(F2FS_I_SB(inode), > > + write ? F2FS_DIO_WRITE : F2FS_DIO_READ); > > + > > + return submit_bio(bio); > > I don't think there is any need for this mess. The F2FS_DIO_WRITE / > F2FS_DIO_READ counts are only used to check if there is any inflight > I/O at all. So instead we can increment them once before calling > iomap_dio_rw, and decrement them in ->end_io or for a failure/noop > exit from iomap_dio_rw. Untested patch below. Note that all this > would be much simpler to review if the last three patches were folded > into a single one. Eric, wdyt? I've merged v1 to v5, including Christoph's comment in v2. > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index 4fbf28f5aaab..9f9cc49fbe94 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -3369,50 +3369,6 @@ static int f2fs_write_end(struct file *file, > return copied; > } > > -static void f2fs_dio_end_io(struct bio *bio) > -{ > - struct f2fs_private_dio *dio = bio->bi_private; > - > - dec_page_count(F2FS_I_SB(dio->inode), > - dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ); > - > - bio->bi_private = dio->orig_private; > - bio->bi_end_io = dio->orig_end_io; > - > - kfree(dio); > - > - bio_endio(bio); > -} > - > -static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap, > - struct bio *bio, loff_t file_offset) > -{ > - struct f2fs_private_dio *dio; > - bool write = (bio_op(bio) == REQ_OP_WRITE); > - > - dio = f2fs_kzalloc(F2FS_I_SB(inode), > - sizeof(struct f2fs_private_dio), GFP_NOFS); > - if (!dio) > - goto out; > - > - dio->inode = inode; > - dio->orig_end_io = bio->bi_end_io; > - dio->orig_private = bio->bi_private; > - dio->write = write; > - > - bio->bi_end_io = f2fs_dio_end_io; > - bio->bi_private = dio; > - > - inc_page_count(F2FS_I_SB(inode), > - write ? F2FS_DIO_WRITE : F2FS_DIO_READ); > - > - return submit_bio(bio); > -out: > - bio->bi_status = BLK_STS_IOERR; > - bio_endio(bio); > - return BLK_QC_T_NONE; > -} > - > void f2fs_invalidate_page(struct page *page, unsigned int offset, > unsigned int length) > { > @@ -4006,6 +3962,18 @@ const struct iomap_ops f2fs_iomap_ops = { > .iomap_begin = f2fs_iomap_begin, > }; > > +static int f2fs_dio_end_io(struct kiocb *iocb, ssize_t size, int error, > + unsigned flags) > +{ > + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); > + > + if (iocb->ki_flags & IOCB_WRITE) > + dec_page_count(sbi, F2FS_DIO_WRITE); > + else > + dec_page_count(sbi, F2FS_DIO_READ); > + return 0; > +} > + > const struct iomap_dio_ops f2fs_iomap_dio_ops = { > - .submit_io = f2fs_dio_submit_bio, > + .end_io = f2fs_dio_end_io, > }; > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 6dbbac05a15c..abd521dc504a 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -1750,13 +1750,6 @@ struct f2fs_sb_info { > #endif > }; > > -struct f2fs_private_dio { > - struct inode *inode; > - void *orig_private; > - bio_end_io_t *orig_end_io; > - bool write; > -}; > - > #ifdef CONFIG_F2FS_FAULT_INJECTION > #define f2fs_show_injection_info(sbi, type) \ > printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \ > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 6b8eac6b25d4..4fed90cc1462 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -4259,6 +4259,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) > down_read(&fi->i_gc_rwsem[READ]); > } > > + inc_page_count(F2FS_I_SB(inode), F2FS_DIO_READ); > ret = iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0); > > up_read(&fi->i_gc_rwsem[READ]); > @@ -4270,6 +4271,8 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) > else if (ret == -EIOCBQUEUED) > f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO, > count - iov_iter_count(to)); > + else > + dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ); > out: > trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); > return ret; > @@ -4446,6 +4449,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, > > if (pos + count > inode->i_size) > dio_flags |= IOMAP_DIO_FORCE_WAIT; > + inc_page_count(F2FS_I_SB(inode), F2FS_DIO_WRITE); > ret = iomap_dio_rw(iocb, from, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, > dio_flags); > if (ret == -ENOTBLK) > @@ -4459,6 +4463,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, > > up_read(&fi->i_gc_rwsem[WRITE]); > > + if (ret <= 0 && ret != -EIOCBQUEUED) > + dec_page_count(F2FS_I_SB(inode), F2FS_DIO_WRITE); > + > if (ret < 0) { > if (ret == -EIOCBQUEUED) > f2fs_update_iostat(sbi, APP_DIRECT_IO,
On 07/22, Jaegeuk Kim wrote: > On 07/19, Christoph Hellwig wrote: > > On Fri, Jul 16, 2021 at 09:39:16AM -0500, Eric Biggers wrote: > > > +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap, > > > + struct bio *bio, loff_t file_offset) > > > +{ > > > + struct f2fs_private_dio *dio; > > > + bool write = (bio_op(bio) == REQ_OP_WRITE); > > > + > > > + dio = f2fs_kzalloc(F2FS_I_SB(inode), > > > + sizeof(struct f2fs_private_dio), GFP_NOFS); > > > + if (!dio) > > > + goto out; > > > + > > > + dio->inode = inode; > > > + dio->orig_end_io = bio->bi_end_io; > > > + dio->orig_private = bio->bi_private; > > > + dio->write = write; > > > + > > > + bio->bi_end_io = f2fs_dio_end_io; > > > + bio->bi_private = dio; > > > + > > > + inc_page_count(F2FS_I_SB(inode), > > > + write ? F2FS_DIO_WRITE : F2FS_DIO_READ); > > > + > > > + return submit_bio(bio); > > > > I don't think there is any need for this mess. The F2FS_DIO_WRITE / > > F2FS_DIO_READ counts are only used to check if there is any inflight > > I/O at all. So instead we can increment them once before calling > > iomap_dio_rw, and decrement them in ->end_io or for a failure/noop > > exit from iomap_dio_rw. Untested patch below. Note that all this > > would be much simpler to review if the last three patches were folded > > into a single one. > > Eric, wdyt? > > I've merged v1 to v5, including Christoph's comment in v2. Sorry, I mean patch #1 to #5. You can find them in: https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/log/?h=dev > > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > > index 4fbf28f5aaab..9f9cc49fbe94 100644 > > --- a/fs/f2fs/data.c > > +++ b/fs/f2fs/data.c > > @@ -3369,50 +3369,6 @@ static int f2fs_write_end(struct file *file, > > return copied; > > } > > > > -static void f2fs_dio_end_io(struct bio *bio) > > -{ > > - struct f2fs_private_dio *dio = bio->bi_private; > > - > > - dec_page_count(F2FS_I_SB(dio->inode), > > - dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ); > > - > > - bio->bi_private = dio->orig_private; > > - bio->bi_end_io = dio->orig_end_io; > > - > > - kfree(dio); > > - > > - bio_endio(bio); > > -} > > - > > -static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap, > > - struct bio *bio, loff_t file_offset) > > -{ > > - struct f2fs_private_dio *dio; > > - bool write = (bio_op(bio) == REQ_OP_WRITE); > > - > > - dio = f2fs_kzalloc(F2FS_I_SB(inode), > > - sizeof(struct f2fs_private_dio), GFP_NOFS); > > - if (!dio) > > - goto out; > > - > > - dio->inode = inode; > > - dio->orig_end_io = bio->bi_end_io; > > - dio->orig_private = bio->bi_private; > > - dio->write = write; > > - > > - bio->bi_end_io = f2fs_dio_end_io; > > - bio->bi_private = dio; > > - > > - inc_page_count(F2FS_I_SB(inode), > > - write ? F2FS_DIO_WRITE : F2FS_DIO_READ); > > - > > - return submit_bio(bio); > > -out: > > - bio->bi_status = BLK_STS_IOERR; > > - bio_endio(bio); > > - return BLK_QC_T_NONE; > > -} > > - > > void f2fs_invalidate_page(struct page *page, unsigned int offset, > > unsigned int length) > > { > > @@ -4006,6 +3962,18 @@ const struct iomap_ops f2fs_iomap_ops = { > > .iomap_begin = f2fs_iomap_begin, > > }; > > > > +static int f2fs_dio_end_io(struct kiocb *iocb, ssize_t size, int error, > > + unsigned flags) > > +{ > > + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); > > + > > + if (iocb->ki_flags & IOCB_WRITE) > > + dec_page_count(sbi, F2FS_DIO_WRITE); > > + else > > + dec_page_count(sbi, F2FS_DIO_READ); > > + return 0; > > +} > > + > > const struct iomap_dio_ops f2fs_iomap_dio_ops = { > > - .submit_io = f2fs_dio_submit_bio, > > + .end_io = f2fs_dio_end_io, > > }; > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > > index 6dbbac05a15c..abd521dc504a 100644 > > --- a/fs/f2fs/f2fs.h > > +++ b/fs/f2fs/f2fs.h > > @@ -1750,13 +1750,6 @@ struct f2fs_sb_info { > > #endif > > }; > > > > -struct f2fs_private_dio { > > - struct inode *inode; > > - void *orig_private; > > - bio_end_io_t *orig_end_io; > > - bool write; > > -}; > > - > > #ifdef CONFIG_F2FS_FAULT_INJECTION > > #define f2fs_show_injection_info(sbi, type) \ > > printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \ > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > > index 6b8eac6b25d4..4fed90cc1462 100644 > > --- a/fs/f2fs/file.c > > +++ b/fs/f2fs/file.c > > @@ -4259,6 +4259,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) > > down_read(&fi->i_gc_rwsem[READ]); > > } > > > > + inc_page_count(F2FS_I_SB(inode), F2FS_DIO_READ); > > ret = iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0); > > > > up_read(&fi->i_gc_rwsem[READ]); > > @@ -4270,6 +4271,8 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) > > else if (ret == -EIOCBQUEUED) > > f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO, > > count - iov_iter_count(to)); > > + else > > + dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ); > > out: > > trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); > > return ret; > > @@ -4446,6 +4449,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, > > > > if (pos + count > inode->i_size) > > dio_flags |= IOMAP_DIO_FORCE_WAIT; > > + inc_page_count(F2FS_I_SB(inode), F2FS_DIO_WRITE); > > ret = iomap_dio_rw(iocb, from, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, > > dio_flags); > > if (ret == -ENOTBLK) > > @@ -4459,6 +4463,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, > > > > up_read(&fi->i_gc_rwsem[WRITE]); > > > > + if (ret <= 0 && ret != -EIOCBQUEUED) > > + dec_page_count(F2FS_I_SB(inode), F2FS_DIO_WRITE); > > + > > if (ret < 0) { > > if (ret == -EIOCBQUEUED) > > f2fs_update_iostat(sbi, APP_DIRECT_IO,
On Thu, Jul 22, 2021 at 01:47:39PM -0700, Jaegeuk Kim wrote: > On 07/19, Christoph Hellwig wrote: > > On Fri, Jul 16, 2021 at 09:39:16AM -0500, Eric Biggers wrote: > > > +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap, > > > + struct bio *bio, loff_t file_offset) > > > +{ > > > + struct f2fs_private_dio *dio; > > > + bool write = (bio_op(bio) == REQ_OP_WRITE); > > > + > > > + dio = f2fs_kzalloc(F2FS_I_SB(inode), > > > + sizeof(struct f2fs_private_dio), GFP_NOFS); > > > + if (!dio) > > > + goto out; > > > + > > > + dio->inode = inode; > > > + dio->orig_end_io = bio->bi_end_io; > > > + dio->orig_private = bio->bi_private; > > > + dio->write = write; > > > + > > > + bio->bi_end_io = f2fs_dio_end_io; > > > + bio->bi_private = dio; > > > + > > > + inc_page_count(F2FS_I_SB(inode), > > > + write ? F2FS_DIO_WRITE : F2FS_DIO_READ); > > > + > > > + return submit_bio(bio); > > > > I don't think there is any need for this mess. The F2FS_DIO_WRITE / > > F2FS_DIO_READ counts are only used to check if there is any inflight > > I/O at all. So instead we can increment them once before calling > > iomap_dio_rw, and decrement them in ->end_io or for a failure/noop > > exit from iomap_dio_rw. Untested patch below. Note that all this > > would be much simpler to review if the last three patches were folded > > into a single one. > > Eric, wdyt? > > I've merged v1 to v5, including Christoph's comment in v2. > I am planning to do this, but I got caught up by the patch "f2fs: fix wrong inflight page stats for directIO" that was recently added to f2fs.git#dev, which makes this suggestion no longer viable. Hence my review comment on that patch (https://lkml.kernel.org/r/YPjNGoFzQojO5Amr@sol.localdomain) and Chao's new version of that patch (https://lkml.kernel.org/r/20210722131617.749204-1-chao@kernel.org), although the new version has some issues too as I commented. If you could just revert "f2fs: fix wrong inflight page stats for directIO" for now, that would be helpful, as I don't think we want it. - Eric
On 07/22, Eric Biggers wrote: > On Thu, Jul 22, 2021 at 01:47:39PM -0700, Jaegeuk Kim wrote: > > On 07/19, Christoph Hellwig wrote: > > > On Fri, Jul 16, 2021 at 09:39:16AM -0500, Eric Biggers wrote: > > > > +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap, > > > > + struct bio *bio, loff_t file_offset) > > > > +{ > > > > + struct f2fs_private_dio *dio; > > > > + bool write = (bio_op(bio) == REQ_OP_WRITE); > > > > + > > > > + dio = f2fs_kzalloc(F2FS_I_SB(inode), > > > > + sizeof(struct f2fs_private_dio), GFP_NOFS); > > > > + if (!dio) > > > > + goto out; > > > > + > > > > + dio->inode = inode; > > > > + dio->orig_end_io = bio->bi_end_io; > > > > + dio->orig_private = bio->bi_private; > > > > + dio->write = write; > > > > + > > > > + bio->bi_end_io = f2fs_dio_end_io; > > > > + bio->bi_private = dio; > > > > + > > > > + inc_page_count(F2FS_I_SB(inode), > > > > + write ? F2FS_DIO_WRITE : F2FS_DIO_READ); > > > > + > > > > + return submit_bio(bio); > > > > > > I don't think there is any need for this mess. The F2FS_DIO_WRITE / > > > F2FS_DIO_READ counts are only used to check if there is any inflight > > > I/O at all. So instead we can increment them once before calling > > > iomap_dio_rw, and decrement them in ->end_io or for a failure/noop > > > exit from iomap_dio_rw. Untested patch below. Note that all this > > > would be much simpler to review if the last three patches were folded > > > into a single one. > > > > Eric, wdyt? > > > > I've merged v1 to v5, including Christoph's comment in v2. > > > > I am planning to do this, but I got caught up by the patch > "f2fs: fix wrong inflight page stats for directIO" that was recently added to > f2fs.git#dev, which makes this suggestion no longer viable. Hence my review > comment on that patch > (https://lkml.kernel.org/r/YPjNGoFzQojO5Amr@sol.localdomain) > and Chao's new version of that patch > (https://lkml.kernel.org/r/20210722131617.749204-1-chao@kernel.org), > although the new version has some issues too as I commented. > > If you could just revert "f2fs: fix wrong inflight page stats for directIO" > for now, that would be helpful, as I don't think we want it. Yup, I dropped it in dev branch, and wait for Chao's next patch on top of iomap. > > - Eric
Hi Christoph, On Mon, Jul 19, 2021 at 10:59:10AM +0200, Christoph Hellwig wrote: > On Fri, Jul 16, 2021 at 09:39:16AM -0500, Eric Biggers wrote: > > +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap, > > + struct bio *bio, loff_t file_offset) > > +{ > > + struct f2fs_private_dio *dio; > > + bool write = (bio_op(bio) == REQ_OP_WRITE); > > + > > + dio = f2fs_kzalloc(F2FS_I_SB(inode), > > + sizeof(struct f2fs_private_dio), GFP_NOFS); > > + if (!dio) > > + goto out; > > + > > + dio->inode = inode; > > + dio->orig_end_io = bio->bi_end_io; > > + dio->orig_private = bio->bi_private; > > + dio->write = write; > > + > > + bio->bi_end_io = f2fs_dio_end_io; > > + bio->bi_private = dio; > > + > > + inc_page_count(F2FS_I_SB(inode), > > + write ? F2FS_DIO_WRITE : F2FS_DIO_READ); > > + > > + return submit_bio(bio); > > I don't think there is any need for this mess. The F2FS_DIO_WRITE / > F2FS_DIO_READ counts are only used to check if there is any inflight > I/O at all. So instead we can increment them once before calling > iomap_dio_rw, and decrement them in ->end_io or for a failure/noop > exit from iomap_dio_rw. Untested patch below. Note that all this > would be much simpler to review if the last three patches were folded > into a single one. > I am trying to do this, but unfortunately I don't see a way to make it work correctly in all cases. The main problem is that when iomap_dio_rw() returns an error (other than -EIOCBQUEUED), there is no way to know whether ->end_io() has been called or not. This is because iomap_dio_rw() can fail either early, before "starting" the I/O (in which case ->end_io() won't have been called), or later, after "starting" the I/O (in which case ->end_io() will have been called). Note that this can't be worked around by checking whether the iov_iter has been advanced or not, since a failure could occur between "starting" the I/O and the iov_iter being advanced for the first time. Would you be receptive to adding a ->begin_io() callback to struct iomap_dio_ops in order to allow filesystems to maintain counters like this? Either way, given the problem here, I think I should leave this out of the initial conversion and just do a dumb translation of the existing f2fs logic to start with, like I have in this patch. - Eric
On Thu, Jul 22, 2021 at 06:52:33PM -0700, Eric Biggers wrote: > I am trying to do this, but unfortunately I don't see a way to make it work > correctly in all cases. > > The main problem is that when iomap_dio_rw() returns an error (other than > -EIOCBQUEUED), there is no way to know whether ->end_io() has been called or > not. This is because iomap_dio_rw() can fail either early, before "starting" > the I/O (in which case ->end_io() won't have been called), or later, after > "starting" the I/O (in which case ->end_io() will have been called). Note that > this can't be worked around by checking whether the iov_iter has been advanced > or not, since a failure could occur between "starting" the I/O and the iov_iter > being advanced for the first time. > > Would you be receptive to adding a ->begin_io() callback to struct iomap_dio_ops > in order to allow filesystems to maintain counters like this? I think we can triviall fix this by using the slightly lower level __iomap_dio_rw API. Incremental patch to my previous one below: diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4fed90cc1462..11844bd0cb7a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4243,6 +4243,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) struct f2fs_inode_info *fi = F2FS_I(inode); const loff_t pos = iocb->ki_pos; const size_t count = iov_iter_count(to); + struct iomap_dio *dio; ssize_t ret; if (count == 0) @@ -4260,8 +4261,13 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) } inc_page_count(F2FS_I_SB(inode), F2FS_DIO_READ); - ret = iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0); - + dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0); + if (IS_ERR_OR_NULL(dio)) { + dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ); + ret = PTR_ERR_OR_ZERO(dio); + } else { + ret = iomap_dio_complete(dio); + } up_read(&fi->i_gc_rwsem[READ]); file_accessed(file); @@ -4271,8 +4277,6 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) else if (ret == -EIOCBQUEUED) f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO, count - iov_iter_count(to)); - else - dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ); out: trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); return ret;
On Fri, Jul 23, 2021 at 06:00:03AM +0100, Christoph Hellwig wrote: > On Thu, Jul 22, 2021 at 06:52:33PM -0700, Eric Biggers wrote: > > I am trying to do this, but unfortunately I don't see a way to make it work > > correctly in all cases. > > > > The main problem is that when iomap_dio_rw() returns an error (other than > > -EIOCBQUEUED), there is no way to know whether ->end_io() has been called or > > not. This is because iomap_dio_rw() can fail either early, before "starting" > > the I/O (in which case ->end_io() won't have been called), or later, after > > "starting" the I/O (in which case ->end_io() will have been called). Note that > > this can't be worked around by checking whether the iov_iter has been advanced > > or not, since a failure could occur between "starting" the I/O and the iov_iter > > being advanced for the first time. > > > > Would you be receptive to adding a ->begin_io() callback to struct iomap_dio_ops > > in order to allow filesystems to maintain counters like this? > > I think we can triviall fix this by using the slightly lower level > __iomap_dio_rw API. Incremental patch to my previous one below: > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 4fed90cc1462..11844bd0cb7a 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -4243,6 +4243,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) > struct f2fs_inode_info *fi = F2FS_I(inode); > const loff_t pos = iocb->ki_pos; > const size_t count = iov_iter_count(to); > + struct iomap_dio *dio; > ssize_t ret; > > if (count == 0) > @@ -4260,8 +4261,13 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) > } > > inc_page_count(F2FS_I_SB(inode), F2FS_DIO_READ); > - ret = iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0); > - > + dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0); > + if (IS_ERR_OR_NULL(dio)) { > + dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ); > + ret = PTR_ERR_OR_ZERO(dio); > + } else { > + ret = iomap_dio_complete(dio); > + } > up_read(&fi->i_gc_rwsem[READ]); > > file_accessed(file); > @@ -4271,8 +4277,6 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) > else if (ret == -EIOCBQUEUED) > f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO, > count - iov_iter_count(to)); > - else > - dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ); > out: > trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); > return ret; I wouldn't call it trivial, but yes that seems to work (after fixing it to handle EIOCBQUEUED correctly). Take a look at the v2 I've sent out. Thanks! - Eric
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 7669de7b49ce..031fbb596450 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -7,6 +7,7 @@ config F2FS_FS select CRYPTO_CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION select FS_ENCRYPTION_ALGS if FS_ENCRYPTION + select FS_IOMAP select LZ4_COMPRESS if F2FS_FS_LZ4 select LZ4_DECOMPRESS if F2FS_FS_LZ4 select LZ4HC_COMPRESS if F2FS_FS_LZ4HC diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cdadaa9daf55..9243159ee753 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -21,6 +21,7 @@ #include <linux/cleancache.h> #include <linux/sched/signal.h> #include <linux/fiemap.h> +#include <linux/iomap.h> #include "f2fs.h" #include "node.h" @@ -3452,7 +3453,7 @@ static void f2fs_dio_end_io(struct bio *bio) bio_endio(bio); } -static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode, +static void f2fs_dio_submit_bio_old(struct bio *bio, struct inode *inode, loff_t file_offset) { struct f2fs_private_dio *dio; @@ -3481,6 +3482,35 @@ static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode, bio_endio(bio); } +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap, + struct bio *bio, loff_t file_offset) +{ + struct f2fs_private_dio *dio; + bool write = (bio_op(bio) == REQ_OP_WRITE); + + dio = f2fs_kzalloc(F2FS_I_SB(inode), + sizeof(struct f2fs_private_dio), GFP_NOFS); + if (!dio) + goto out; + + dio->inode = inode; + dio->orig_end_io = bio->bi_end_io; + dio->orig_private = bio->bi_private; + dio->write = write; + + bio->bi_end_io = f2fs_dio_end_io; + bio->bi_private = dio; + + inc_page_count(F2FS_I_SB(inode), + write ? F2FS_DIO_WRITE : F2FS_DIO_READ); + + return submit_bio(bio); +out: + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return BLK_QC_T_NONE; +} + static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct address_space *mapping = iocb->ki_filp->f_mapping; @@ -3529,7 +3559,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, rw == WRITE ? get_data_block_dio_write : - get_data_block_dio, NULL, f2fs_dio_submit_bio, + get_data_block_dio, NULL, f2fs_dio_submit_bio_old, rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES : DIO_SKIP_HOLES); @@ -4101,3 +4131,64 @@ void f2fs_destroy_bio_entry_cache(void) { kmem_cache_destroy(bio_entry_slab); } + +static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned int flags, struct iomap *iomap, + struct iomap *srcmap) +{ + struct f2fs_map_blocks map = {}; + pgoff_t next_pgofs = 0; + int err; + + map.m_lblk = bytes_to_blks(inode, offset); + map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1; + map.m_next_pgofs = &next_pgofs; + map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); + if (flags & IOMAP_WRITE) + map.m_may_create = true; + + err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE, + F2FS_GET_BLOCK_DIO); + if (err) + return err; + + iomap->offset = blks_to_bytes(inode, map.m_lblk); + + if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) { + iomap->length = blks_to_bytes(inode, map.m_len); + if (map.m_flags & F2FS_MAP_MAPPED) { + iomap->type = IOMAP_MAPPED; + iomap->flags |= IOMAP_F_MERGED; + } else { + iomap->type = IOMAP_UNWRITTEN; + } + if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk))) + return -EINVAL; + iomap->addr = blks_to_bytes(inode, map.m_pblk); + + if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode)))) + return -EINVAL; + iomap->bdev = inode->i_sb->s_bdev; + } else { + iomap->length = blks_to_bytes(inode, next_pgofs) - + iomap->offset; + iomap->type = IOMAP_HOLE; + iomap->addr = IOMAP_NULL_ADDR; + } + + if (map.m_flags & F2FS_MAP_NEW) + iomap->flags |= IOMAP_F_NEW; + if ((inode->i_state & I_DIRTY_DATASYNC) || + offset + length > i_size_read(inode)) + iomap->flags |= IOMAP_F_DIRTY; + + return 0; +} + +const struct iomap_ops f2fs_iomap_ops = { + .iomap_begin = f2fs_iomap_begin, +}; + +const struct iomap_dio_ops f2fs_iomap_dio_ops = { + .submit_io = f2fs_dio_submit_bio, +}; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index da1da3111f18..d2b1ef6976c4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3639,6 +3639,8 @@ int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi); +extern const struct iomap_ops f2fs_iomap_ops; +extern const struct iomap_dio_ops f2fs_iomap_dio_ops; /* * gc.c