diff mbox series

[6/9] f2fs: implement iomap operations

Message ID 20210716143919.44373-7-ebiggers@kernel.org (mailing list archive)
State New, archived
Headers show
Series f2fs: use iomap for direct I/O | expand

Commit Message

Eric Biggers July 16, 2021, 2:39 p.m. UTC
From: Eric Biggers <ebiggers@google.com>

Implement 'struct iomap_ops' and 'struct iomap_dio_ops' for f2fs, in
preparation for making f2fs use iomap for direct I/O.

Note that f2fs_iomap_ops may be used for other things besides direct I/O
in the future; however, for now I've only tested it for direct I/O.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/f2fs/Kconfig |  1 +
 fs/f2fs/data.c  | 95 +++++++++++++++++++++++++++++++++++++++++++++++--
 fs/f2fs/f2fs.h  |  2 ++
 3 files changed, 96 insertions(+), 2 deletions(-)

Comments

Christoph Hellwig July 19, 2021, 8:59 a.m. UTC | #1
On Fri, Jul 16, 2021 at 09:39:16AM -0500, Eric Biggers wrote:
> +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap,
> +				    struct bio *bio, loff_t file_offset)
> +{
> +	struct f2fs_private_dio *dio;
> +	bool write = (bio_op(bio) == REQ_OP_WRITE);
> +
> +	dio = f2fs_kzalloc(F2FS_I_SB(inode),
> +			sizeof(struct f2fs_private_dio), GFP_NOFS);
> +	if (!dio)
> +		goto out;
> +
> +	dio->inode = inode;
> +	dio->orig_end_io = bio->bi_end_io;
> +	dio->orig_private = bio->bi_private;
> +	dio->write = write;
> +
> +	bio->bi_end_io = f2fs_dio_end_io;
> +	bio->bi_private = dio;
> +
> +	inc_page_count(F2FS_I_SB(inode),
> +			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> +
> +	return submit_bio(bio);

I don't think there is any need for this mess.  The F2FS_DIO_WRITE /
F2FS_DIO_READ counts are only used to check if there is any inflight
I/O at all.  So instead we can increment them once before calling
iomap_dio_rw, and decrement them in ->end_io or for a failure/noop
exit from iomap_dio_rw.  Untested patch below.  Note that all this
would be much simpler to review if the last three patches were folded
into a single one.

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 4fbf28f5aaab..9f9cc49fbe94 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3369,50 +3369,6 @@ static int f2fs_write_end(struct file *file,
 	return copied;
 }
 
-static void f2fs_dio_end_io(struct bio *bio)
-{
-	struct f2fs_private_dio *dio = bio->bi_private;
-
-	dec_page_count(F2FS_I_SB(dio->inode),
-			dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-
-	bio->bi_private = dio->orig_private;
-	bio->bi_end_io = dio->orig_end_io;
-
-	kfree(dio);
-
-	bio_endio(bio);
-}
-
-static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap,
-				    struct bio *bio, loff_t file_offset)
-{
-	struct f2fs_private_dio *dio;
-	bool write = (bio_op(bio) == REQ_OP_WRITE);
-
-	dio = f2fs_kzalloc(F2FS_I_SB(inode),
-			sizeof(struct f2fs_private_dio), GFP_NOFS);
-	if (!dio)
-		goto out;
-
-	dio->inode = inode;
-	dio->orig_end_io = bio->bi_end_io;
-	dio->orig_private = bio->bi_private;
-	dio->write = write;
-
-	bio->bi_end_io = f2fs_dio_end_io;
-	bio->bi_private = dio;
-
-	inc_page_count(F2FS_I_SB(inode),
-			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-
-	return submit_bio(bio);
-out:
-	bio->bi_status = BLK_STS_IOERR;
-	bio_endio(bio);
-	return BLK_QC_T_NONE;
-}
-
 void f2fs_invalidate_page(struct page *page, unsigned int offset,
 							unsigned int length)
 {
@@ -4006,6 +3962,18 @@ const struct iomap_ops f2fs_iomap_ops = {
 	.iomap_begin	= f2fs_iomap_begin,
 };
 
+static int f2fs_dio_end_io(struct kiocb *iocb, ssize_t size, int error,
+		unsigned flags)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
+
+	if (iocb->ki_flags & IOCB_WRITE)
+		dec_page_count(sbi, F2FS_DIO_WRITE);
+	else
+		dec_page_count(sbi, F2FS_DIO_READ);
+	return 0;
+}
+
 const struct iomap_dio_ops f2fs_iomap_dio_ops = {
-	.submit_io	= f2fs_dio_submit_bio,
+	.end_io		= f2fs_dio_end_io,
 };
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 6dbbac05a15c..abd521dc504a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1750,13 +1750,6 @@ struct f2fs_sb_info {
 #endif
 };
 
-struct f2fs_private_dio {
-	struct inode *inode;
-	void *orig_private;
-	bio_end_io_t *orig_end_io;
-	bool write;
-};
-
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 #define f2fs_show_injection_info(sbi, type)					\
 	printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n",	\
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6b8eac6b25d4..4fed90cc1462 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4259,6 +4259,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		down_read(&fi->i_gc_rwsem[READ]);
 	}
 
+	inc_page_count(F2FS_I_SB(inode), F2FS_DIO_READ);
 	ret = iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0);
 
 	up_read(&fi->i_gc_rwsem[READ]);
@@ -4270,6 +4271,8 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	else if (ret == -EIOCBQUEUED)
 		f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
 				   count - iov_iter_count(to));
+	else
+		dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ);
 out:
 	trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
 	return ret;
@@ -4446,6 +4449,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
 
 	if (pos + count > inode->i_size)
 		dio_flags |= IOMAP_DIO_FORCE_WAIT;
+	inc_page_count(F2FS_I_SB(inode), F2FS_DIO_WRITE);
 	ret = iomap_dio_rw(iocb, from, &f2fs_iomap_ops, &f2fs_iomap_dio_ops,
 			   dio_flags);
 	if (ret == -ENOTBLK)
@@ -4459,6 +4463,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
 
 	up_read(&fi->i_gc_rwsem[WRITE]);
 
+	if (ret <= 0 && ret != -EIOCBQUEUED)
+		dec_page_count(F2FS_I_SB(inode), F2FS_DIO_WRITE);
+
 	if (ret < 0) {
 		if (ret == -EIOCBQUEUED)
 			f2fs_update_iostat(sbi, APP_DIRECT_IO,
Jaegeuk Kim July 22, 2021, 8:47 p.m. UTC | #2
On 07/19, Christoph Hellwig wrote:
> On Fri, Jul 16, 2021 at 09:39:16AM -0500, Eric Biggers wrote:
> > +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap,
> > +				    struct bio *bio, loff_t file_offset)
> > +{
> > +	struct f2fs_private_dio *dio;
> > +	bool write = (bio_op(bio) == REQ_OP_WRITE);
> > +
> > +	dio = f2fs_kzalloc(F2FS_I_SB(inode),
> > +			sizeof(struct f2fs_private_dio), GFP_NOFS);
> > +	if (!dio)
> > +		goto out;
> > +
> > +	dio->inode = inode;
> > +	dio->orig_end_io = bio->bi_end_io;
> > +	dio->orig_private = bio->bi_private;
> > +	dio->write = write;
> > +
> > +	bio->bi_end_io = f2fs_dio_end_io;
> > +	bio->bi_private = dio;
> > +
> > +	inc_page_count(F2FS_I_SB(inode),
> > +			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> > +
> > +	return submit_bio(bio);
> 
> I don't think there is any need for this mess.  The F2FS_DIO_WRITE /
> F2FS_DIO_READ counts are only used to check if there is any inflight
> I/O at all.  So instead we can increment them once before calling
> iomap_dio_rw, and decrement them in ->end_io or for a failure/noop
> exit from iomap_dio_rw.  Untested patch below.  Note that all this
> would be much simpler to review if the last three patches were folded
> into a single one.

Eric, wdyt?

I've merged v1 to v5, including Christoph's comment in v2.

> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 4fbf28f5aaab..9f9cc49fbe94 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -3369,50 +3369,6 @@ static int f2fs_write_end(struct file *file,
>  	return copied;
>  }
>  
> -static void f2fs_dio_end_io(struct bio *bio)
> -{
> -	struct f2fs_private_dio *dio = bio->bi_private;
> -
> -	dec_page_count(F2FS_I_SB(dio->inode),
> -			dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> -
> -	bio->bi_private = dio->orig_private;
> -	bio->bi_end_io = dio->orig_end_io;
> -
> -	kfree(dio);
> -
> -	bio_endio(bio);
> -}
> -
> -static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap,
> -				    struct bio *bio, loff_t file_offset)
> -{
> -	struct f2fs_private_dio *dio;
> -	bool write = (bio_op(bio) == REQ_OP_WRITE);
> -
> -	dio = f2fs_kzalloc(F2FS_I_SB(inode),
> -			sizeof(struct f2fs_private_dio), GFP_NOFS);
> -	if (!dio)
> -		goto out;
> -
> -	dio->inode = inode;
> -	dio->orig_end_io = bio->bi_end_io;
> -	dio->orig_private = bio->bi_private;
> -	dio->write = write;
> -
> -	bio->bi_end_io = f2fs_dio_end_io;
> -	bio->bi_private = dio;
> -
> -	inc_page_count(F2FS_I_SB(inode),
> -			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> -
> -	return submit_bio(bio);
> -out:
> -	bio->bi_status = BLK_STS_IOERR;
> -	bio_endio(bio);
> -	return BLK_QC_T_NONE;
> -}
> -
>  void f2fs_invalidate_page(struct page *page, unsigned int offset,
>  							unsigned int length)
>  {
> @@ -4006,6 +3962,18 @@ const struct iomap_ops f2fs_iomap_ops = {
>  	.iomap_begin	= f2fs_iomap_begin,
>  };
>  
> +static int f2fs_dio_end_io(struct kiocb *iocb, ssize_t size, int error,
> +		unsigned flags)
> +{
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
> +
> +	if (iocb->ki_flags & IOCB_WRITE)
> +		dec_page_count(sbi, F2FS_DIO_WRITE);
> +	else
> +		dec_page_count(sbi, F2FS_DIO_READ);
> +	return 0;
> +}
> +
>  const struct iomap_dio_ops f2fs_iomap_dio_ops = {
> -	.submit_io	= f2fs_dio_submit_bio,
> +	.end_io		= f2fs_dio_end_io,
>  };
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 6dbbac05a15c..abd521dc504a 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1750,13 +1750,6 @@ struct f2fs_sb_info {
>  #endif
>  };
>  
> -struct f2fs_private_dio {
> -	struct inode *inode;
> -	void *orig_private;
> -	bio_end_io_t *orig_end_io;
> -	bool write;
> -};
> -
>  #ifdef CONFIG_F2FS_FAULT_INJECTION
>  #define f2fs_show_injection_info(sbi, type)					\
>  	printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n",	\
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 6b8eac6b25d4..4fed90cc1462 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -4259,6 +4259,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  		down_read(&fi->i_gc_rwsem[READ]);
>  	}
>  
> +	inc_page_count(F2FS_I_SB(inode), F2FS_DIO_READ);
>  	ret = iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0);
>  
>  	up_read(&fi->i_gc_rwsem[READ]);
> @@ -4270,6 +4271,8 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  	else if (ret == -EIOCBQUEUED)
>  		f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
>  				   count - iov_iter_count(to));
> +	else
> +		dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ);
>  out:
>  	trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
>  	return ret;
> @@ -4446,6 +4449,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
>  
>  	if (pos + count > inode->i_size)
>  		dio_flags |= IOMAP_DIO_FORCE_WAIT;
> +	inc_page_count(F2FS_I_SB(inode), F2FS_DIO_WRITE);
>  	ret = iomap_dio_rw(iocb, from, &f2fs_iomap_ops, &f2fs_iomap_dio_ops,
>  			   dio_flags);
>  	if (ret == -ENOTBLK)
> @@ -4459,6 +4463,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
>  
>  	up_read(&fi->i_gc_rwsem[WRITE]);
>  
> +	if (ret <= 0 && ret != -EIOCBQUEUED)
> +		dec_page_count(F2FS_I_SB(inode), F2FS_DIO_WRITE);
> +
>  	if (ret < 0) {
>  		if (ret == -EIOCBQUEUED)
>  			f2fs_update_iostat(sbi, APP_DIRECT_IO,
Jaegeuk Kim July 22, 2021, 8:49 p.m. UTC | #3
On 07/22, Jaegeuk Kim wrote:
> On 07/19, Christoph Hellwig wrote:
> > On Fri, Jul 16, 2021 at 09:39:16AM -0500, Eric Biggers wrote:
> > > +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap,
> > > +				    struct bio *bio, loff_t file_offset)
> > > +{
> > > +	struct f2fs_private_dio *dio;
> > > +	bool write = (bio_op(bio) == REQ_OP_WRITE);
> > > +
> > > +	dio = f2fs_kzalloc(F2FS_I_SB(inode),
> > > +			sizeof(struct f2fs_private_dio), GFP_NOFS);
> > > +	if (!dio)
> > > +		goto out;
> > > +
> > > +	dio->inode = inode;
> > > +	dio->orig_end_io = bio->bi_end_io;
> > > +	dio->orig_private = bio->bi_private;
> > > +	dio->write = write;
> > > +
> > > +	bio->bi_end_io = f2fs_dio_end_io;
> > > +	bio->bi_private = dio;
> > > +
> > > +	inc_page_count(F2FS_I_SB(inode),
> > > +			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> > > +
> > > +	return submit_bio(bio);
> > 
> > I don't think there is any need for this mess.  The F2FS_DIO_WRITE /
> > F2FS_DIO_READ counts are only used to check if there is any inflight
> > I/O at all.  So instead we can increment them once before calling
> > iomap_dio_rw, and decrement them in ->end_io or for a failure/noop
> > exit from iomap_dio_rw.  Untested patch below.  Note that all this
> > would be much simpler to review if the last three patches were folded
> > into a single one.
> 
> Eric, wdyt?
> 
> I've merged v1 to v5, including Christoph's comment in v2.

Sorry, I mean patch #1 to #5. You can find them in:
https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/log/?h=dev

> 
> > 
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 4fbf28f5aaab..9f9cc49fbe94 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -3369,50 +3369,6 @@ static int f2fs_write_end(struct file *file,
> >  	return copied;
> >  }
> >  
> > -static void f2fs_dio_end_io(struct bio *bio)
> > -{
> > -	struct f2fs_private_dio *dio = bio->bi_private;
> > -
> > -	dec_page_count(F2FS_I_SB(dio->inode),
> > -			dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> > -
> > -	bio->bi_private = dio->orig_private;
> > -	bio->bi_end_io = dio->orig_end_io;
> > -
> > -	kfree(dio);
> > -
> > -	bio_endio(bio);
> > -}
> > -
> > -static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap,
> > -				    struct bio *bio, loff_t file_offset)
> > -{
> > -	struct f2fs_private_dio *dio;
> > -	bool write = (bio_op(bio) == REQ_OP_WRITE);
> > -
> > -	dio = f2fs_kzalloc(F2FS_I_SB(inode),
> > -			sizeof(struct f2fs_private_dio), GFP_NOFS);
> > -	if (!dio)
> > -		goto out;
> > -
> > -	dio->inode = inode;
> > -	dio->orig_end_io = bio->bi_end_io;
> > -	dio->orig_private = bio->bi_private;
> > -	dio->write = write;
> > -
> > -	bio->bi_end_io = f2fs_dio_end_io;
> > -	bio->bi_private = dio;
> > -
> > -	inc_page_count(F2FS_I_SB(inode),
> > -			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> > -
> > -	return submit_bio(bio);
> > -out:
> > -	bio->bi_status = BLK_STS_IOERR;
> > -	bio_endio(bio);
> > -	return BLK_QC_T_NONE;
> > -}
> > -
> >  void f2fs_invalidate_page(struct page *page, unsigned int offset,
> >  							unsigned int length)
> >  {
> > @@ -4006,6 +3962,18 @@ const struct iomap_ops f2fs_iomap_ops = {
> >  	.iomap_begin	= f2fs_iomap_begin,
> >  };
> >  
> > +static int f2fs_dio_end_io(struct kiocb *iocb, ssize_t size, int error,
> > +		unsigned flags)
> > +{
> > +	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
> > +
> > +	if (iocb->ki_flags & IOCB_WRITE)
> > +		dec_page_count(sbi, F2FS_DIO_WRITE);
> > +	else
> > +		dec_page_count(sbi, F2FS_DIO_READ);
> > +	return 0;
> > +}
> > +
> >  const struct iomap_dio_ops f2fs_iomap_dio_ops = {
> > -	.submit_io	= f2fs_dio_submit_bio,
> > +	.end_io		= f2fs_dio_end_io,
> >  };
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 6dbbac05a15c..abd521dc504a 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1750,13 +1750,6 @@ struct f2fs_sb_info {
> >  #endif
> >  };
> >  
> > -struct f2fs_private_dio {
> > -	struct inode *inode;
> > -	void *orig_private;
> > -	bio_end_io_t *orig_end_io;
> > -	bool write;
> > -};
> > -
> >  #ifdef CONFIG_F2FS_FAULT_INJECTION
> >  #define f2fs_show_injection_info(sbi, type)					\
> >  	printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n",	\
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 6b8eac6b25d4..4fed90cc1462 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -4259,6 +4259,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
> >  		down_read(&fi->i_gc_rwsem[READ]);
> >  	}
> >  
> > +	inc_page_count(F2FS_I_SB(inode), F2FS_DIO_READ);
> >  	ret = iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0);
> >  
> >  	up_read(&fi->i_gc_rwsem[READ]);
> > @@ -4270,6 +4271,8 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
> >  	else if (ret == -EIOCBQUEUED)
> >  		f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
> >  				   count - iov_iter_count(to));
> > +	else
> > +		dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ);
> >  out:
> >  	trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
> >  	return ret;
> > @@ -4446,6 +4449,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
> >  
> >  	if (pos + count > inode->i_size)
> >  		dio_flags |= IOMAP_DIO_FORCE_WAIT;
> > +	inc_page_count(F2FS_I_SB(inode), F2FS_DIO_WRITE);
> >  	ret = iomap_dio_rw(iocb, from, &f2fs_iomap_ops, &f2fs_iomap_dio_ops,
> >  			   dio_flags);
> >  	if (ret == -ENOTBLK)
> > @@ -4459,6 +4463,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
> >  
> >  	up_read(&fi->i_gc_rwsem[WRITE]);
> >  
> > +	if (ret <= 0 && ret != -EIOCBQUEUED)
> > +		dec_page_count(F2FS_I_SB(inode), F2FS_DIO_WRITE);
> > +
> >  	if (ret < 0) {
> >  		if (ret == -EIOCBQUEUED)
> >  			f2fs_update_iostat(sbi, APP_DIRECT_IO,
Eric Biggers July 22, 2021, 8:54 p.m. UTC | #4
On Thu, Jul 22, 2021 at 01:47:39PM -0700, Jaegeuk Kim wrote:
> On 07/19, Christoph Hellwig wrote:
> > On Fri, Jul 16, 2021 at 09:39:16AM -0500, Eric Biggers wrote:
> > > +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap,
> > > +				    struct bio *bio, loff_t file_offset)
> > > +{
> > > +	struct f2fs_private_dio *dio;
> > > +	bool write = (bio_op(bio) == REQ_OP_WRITE);
> > > +
> > > +	dio = f2fs_kzalloc(F2FS_I_SB(inode),
> > > +			sizeof(struct f2fs_private_dio), GFP_NOFS);
> > > +	if (!dio)
> > > +		goto out;
> > > +
> > > +	dio->inode = inode;
> > > +	dio->orig_end_io = bio->bi_end_io;
> > > +	dio->orig_private = bio->bi_private;
> > > +	dio->write = write;
> > > +
> > > +	bio->bi_end_io = f2fs_dio_end_io;
> > > +	bio->bi_private = dio;
> > > +
> > > +	inc_page_count(F2FS_I_SB(inode),
> > > +			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> > > +
> > > +	return submit_bio(bio);
> > 
> > I don't think there is any need for this mess.  The F2FS_DIO_WRITE /
> > F2FS_DIO_READ counts are only used to check if there is any inflight
> > I/O at all.  So instead we can increment them once before calling
> > iomap_dio_rw, and decrement them in ->end_io or for a failure/noop
> > exit from iomap_dio_rw.  Untested patch below.  Note that all this
> > would be much simpler to review if the last three patches were folded
> > into a single one.
> 
> Eric, wdyt?
> 
> I've merged v1 to v5, including Christoph's comment in v2.
> 

I am planning to do this, but I got caught up by the patch
"f2fs: fix wrong inflight page stats for directIO" that was recently added to
f2fs.git#dev, which makes this suggestion no longer viable.  Hence my review
comment on that patch
(https://lkml.kernel.org/r/YPjNGoFzQojO5Amr@sol.localdomain)
and Chao's new version of that patch
(https://lkml.kernel.org/r/20210722131617.749204-1-chao@kernel.org),
although the new version has some issues too as I commented.

If you could just revert "f2fs: fix wrong inflight page stats for directIO"
for now, that would be helpful, as I don't think we want it.

- Eric
Jaegeuk Kim July 22, 2021, 9:57 p.m. UTC | #5
On 07/22, Eric Biggers wrote:
> On Thu, Jul 22, 2021 at 01:47:39PM -0700, Jaegeuk Kim wrote:
> > On 07/19, Christoph Hellwig wrote:
> > > On Fri, Jul 16, 2021 at 09:39:16AM -0500, Eric Biggers wrote:
> > > > +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap,
> > > > +				    struct bio *bio, loff_t file_offset)
> > > > +{
> > > > +	struct f2fs_private_dio *dio;
> > > > +	bool write = (bio_op(bio) == REQ_OP_WRITE);
> > > > +
> > > > +	dio = f2fs_kzalloc(F2FS_I_SB(inode),
> > > > +			sizeof(struct f2fs_private_dio), GFP_NOFS);
> > > > +	if (!dio)
> > > > +		goto out;
> > > > +
> > > > +	dio->inode = inode;
> > > > +	dio->orig_end_io = bio->bi_end_io;
> > > > +	dio->orig_private = bio->bi_private;
> > > > +	dio->write = write;
> > > > +
> > > > +	bio->bi_end_io = f2fs_dio_end_io;
> > > > +	bio->bi_private = dio;
> > > > +
> > > > +	inc_page_count(F2FS_I_SB(inode),
> > > > +			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> > > > +
> > > > +	return submit_bio(bio);
> > > 
> > > I don't think there is any need for this mess.  The F2FS_DIO_WRITE /
> > > F2FS_DIO_READ counts are only used to check if there is any inflight
> > > I/O at all.  So instead we can increment them once before calling
> > > iomap_dio_rw, and decrement them in ->end_io or for a failure/noop
> > > exit from iomap_dio_rw.  Untested patch below.  Note that all this
> > > would be much simpler to review if the last three patches were folded
> > > into a single one.
> > 
> > Eric, wdyt?
> > 
> > I've merged v1 to v5, including Christoph's comment in v2.
> > 
> 
> I am planning to do this, but I got caught up by the patch
> "f2fs: fix wrong inflight page stats for directIO" that was recently added to
> f2fs.git#dev, which makes this suggestion no longer viable.  Hence my review
> comment on that patch
> (https://lkml.kernel.org/r/YPjNGoFzQojO5Amr@sol.localdomain)
> and Chao's new version of that patch
> (https://lkml.kernel.org/r/20210722131617.749204-1-chao@kernel.org),
> although the new version has some issues too as I commented.
> 
> If you could just revert "f2fs: fix wrong inflight page stats for directIO"
> for now, that would be helpful, as I don't think we want it.

Yup, I dropped it in dev branch, and wait for Chao's next patch on top of
iomap.

> 
> - Eric
Eric Biggers July 23, 2021, 1:52 a.m. UTC | #6
Hi Christoph,

On Mon, Jul 19, 2021 at 10:59:10AM +0200, Christoph Hellwig wrote:
> On Fri, Jul 16, 2021 at 09:39:16AM -0500, Eric Biggers wrote:
> > +static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap,
> > +				    struct bio *bio, loff_t file_offset)
> > +{
> > +	struct f2fs_private_dio *dio;
> > +	bool write = (bio_op(bio) == REQ_OP_WRITE);
> > +
> > +	dio = f2fs_kzalloc(F2FS_I_SB(inode),
> > +			sizeof(struct f2fs_private_dio), GFP_NOFS);
> > +	if (!dio)
> > +		goto out;
> > +
> > +	dio->inode = inode;
> > +	dio->orig_end_io = bio->bi_end_io;
> > +	dio->orig_private = bio->bi_private;
> > +	dio->write = write;
> > +
> > +	bio->bi_end_io = f2fs_dio_end_io;
> > +	bio->bi_private = dio;
> > +
> > +	inc_page_count(F2FS_I_SB(inode),
> > +			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> > +
> > +	return submit_bio(bio);
> 
> I don't think there is any need for this mess.  The F2FS_DIO_WRITE /
> F2FS_DIO_READ counts are only used to check if there is any inflight
> I/O at all.  So instead we can increment them once before calling
> iomap_dio_rw, and decrement them in ->end_io or for a failure/noop
> exit from iomap_dio_rw.  Untested patch below.  Note that all this
> would be much simpler to review if the last three patches were folded
> into a single one.
> 

I am trying to do this, but unfortunately I don't see a way to make it work
correctly in all cases.

The main problem is that when iomap_dio_rw() returns an error (other than
-EIOCBQUEUED), there is no way to know whether ->end_io() has been called or
not.  This is because iomap_dio_rw() can fail either early, before "starting"
the I/O (in which case ->end_io() won't have been called), or later, after
"starting" the I/O (in which case ->end_io() will have been called).  Note that
this can't be worked around by checking whether the iov_iter has been advanced
or not, since a failure could occur between "starting" the I/O and the iov_iter
being advanced for the first time.

Would you be receptive to adding a ->begin_io() callback to struct iomap_dio_ops
in order to allow filesystems to maintain counters like this?

Either way, given the problem here, I think I should leave this out of the
initial conversion and just do a dumb translation of the existing f2fs logic to
start with, like I have in this patch.

- Eric
Christoph Hellwig July 23, 2021, 5 a.m. UTC | #7
On Thu, Jul 22, 2021 at 06:52:33PM -0700, Eric Biggers wrote:
> I am trying to do this, but unfortunately I don't see a way to make it work
> correctly in all cases.
> 
> The main problem is that when iomap_dio_rw() returns an error (other than
> -EIOCBQUEUED), there is no way to know whether ->end_io() has been called or
> not.  This is because iomap_dio_rw() can fail either early, before "starting"
> the I/O (in which case ->end_io() won't have been called), or later, after
> "starting" the I/O (in which case ->end_io() will have been called).  Note that
> this can't be worked around by checking whether the iov_iter has been advanced
> or not, since a failure could occur between "starting" the I/O and the iov_iter
> being advanced for the first time.
> 
> Would you be receptive to adding a ->begin_io() callback to struct iomap_dio_ops
> in order to allow filesystems to maintain counters like this?

I think we can triviall fix this by using the slightly lower level
__iomap_dio_rw API.  Incremental patch to my previous one below:

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 4fed90cc1462..11844bd0cb7a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4243,6 +4243,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	const loff_t pos = iocb->ki_pos;
 	const size_t count = iov_iter_count(to);
+	struct iomap_dio *dio;
 	ssize_t ret;
 
 	if (count == 0)
@@ -4260,8 +4261,13 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	}
 
 	inc_page_count(F2FS_I_SB(inode), F2FS_DIO_READ);
-	ret = iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0);
-
+	dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0);
+	if (IS_ERR_OR_NULL(dio)) {
+		dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ);
+		ret = PTR_ERR_OR_ZERO(dio);
+	} else {
+		ret = iomap_dio_complete(dio);
+	}
 	up_read(&fi->i_gc_rwsem[READ]);
 
 	file_accessed(file);
@@ -4271,8 +4277,6 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	else if (ret == -EIOCBQUEUED)
 		f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
 				   count - iov_iter_count(to));
-	else
-		dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ);
 out:
 	trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
 	return ret;
Eric Biggers July 23, 2021, 8:05 a.m. UTC | #8
On Fri, Jul 23, 2021 at 06:00:03AM +0100, Christoph Hellwig wrote:
> On Thu, Jul 22, 2021 at 06:52:33PM -0700, Eric Biggers wrote:
> > I am trying to do this, but unfortunately I don't see a way to make it work
> > correctly in all cases.
> > 
> > The main problem is that when iomap_dio_rw() returns an error (other than
> > -EIOCBQUEUED), there is no way to know whether ->end_io() has been called or
> > not.  This is because iomap_dio_rw() can fail either early, before "starting"
> > the I/O (in which case ->end_io() won't have been called), or later, after
> > "starting" the I/O (in which case ->end_io() will have been called).  Note that
> > this can't be worked around by checking whether the iov_iter has been advanced
> > or not, since a failure could occur between "starting" the I/O and the iov_iter
> > being advanced for the first time.
> > 
> > Would you be receptive to adding a ->begin_io() callback to struct iomap_dio_ops
> > in order to allow filesystems to maintain counters like this?
> 
> I think we can triviall fix this by using the slightly lower level
> __iomap_dio_rw API.  Incremental patch to my previous one below:
> 
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 4fed90cc1462..11844bd0cb7a 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -4243,6 +4243,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  	struct f2fs_inode_info *fi = F2FS_I(inode);
>  	const loff_t pos = iocb->ki_pos;
>  	const size_t count = iov_iter_count(to);
> +	struct iomap_dio *dio;
>  	ssize_t ret;
>  
>  	if (count == 0)
> @@ -4260,8 +4261,13 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  	}
>  
>  	inc_page_count(F2FS_I_SB(inode), F2FS_DIO_READ);
> -	ret = iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0);
> -
> +	dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, 0);
> +	if (IS_ERR_OR_NULL(dio)) {
> +		dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ);
> +		ret = PTR_ERR_OR_ZERO(dio);
> +	} else {
> +		ret = iomap_dio_complete(dio);
> +	}
>  	up_read(&fi->i_gc_rwsem[READ]);
>  
>  	file_accessed(file);
> @@ -4271,8 +4277,6 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  	else if (ret == -EIOCBQUEUED)
>  		f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
>  				   count - iov_iter_count(to));
> -	else
> -		dec_page_count(F2FS_I_SB(inode), F2FS_DIO_READ);
>  out:
>  	trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
>  	return ret;

I wouldn't call it trivial, but yes that seems to work (after fixing it to
handle EIOCBQUEUED correctly).  Take a look at the v2 I've sent out.  Thanks!

- Eric
diff mbox series

Patch

diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 7669de7b49ce..031fbb596450 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -7,6 +7,7 @@  config F2FS_FS
 	select CRYPTO_CRC32
 	select F2FS_FS_XATTR if FS_ENCRYPTION
 	select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
+	select FS_IOMAP
 	select LZ4_COMPRESS if F2FS_FS_LZ4
 	select LZ4_DECOMPRESS if F2FS_FS_LZ4
 	select LZ4HC_COMPRESS if F2FS_FS_LZ4HC
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index cdadaa9daf55..9243159ee753 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -21,6 +21,7 @@ 
 #include <linux/cleancache.h>
 #include <linux/sched/signal.h>
 #include <linux/fiemap.h>
+#include <linux/iomap.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -3452,7 +3453,7 @@  static void f2fs_dio_end_io(struct bio *bio)
 	bio_endio(bio);
 }
 
-static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
+static void f2fs_dio_submit_bio_old(struct bio *bio, struct inode *inode,
 							loff_t file_offset)
 {
 	struct f2fs_private_dio *dio;
@@ -3481,6 +3482,35 @@  static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
 	bio_endio(bio);
 }
 
+static blk_qc_t f2fs_dio_submit_bio(struct inode *inode, struct iomap *iomap,
+				    struct bio *bio, loff_t file_offset)
+{
+	struct f2fs_private_dio *dio;
+	bool write = (bio_op(bio) == REQ_OP_WRITE);
+
+	dio = f2fs_kzalloc(F2FS_I_SB(inode),
+			sizeof(struct f2fs_private_dio), GFP_NOFS);
+	if (!dio)
+		goto out;
+
+	dio->inode = inode;
+	dio->orig_end_io = bio->bi_end_io;
+	dio->orig_private = bio->bi_private;
+	dio->write = write;
+
+	bio->bi_end_io = f2fs_dio_end_io;
+	bio->bi_private = dio;
+
+	inc_page_count(F2FS_I_SB(inode),
+			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
+
+	return submit_bio(bio);
+out:
+	bio->bi_status = BLK_STS_IOERR;
+	bio_endio(bio);
+	return BLK_QC_T_NONE;
+}
+
 static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct address_space *mapping = iocb->ki_filp->f_mapping;
@@ -3529,7 +3559,7 @@  static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 	err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
 			iter, rw == WRITE ? get_data_block_dio_write :
-			get_data_block_dio, NULL, f2fs_dio_submit_bio,
+			get_data_block_dio, NULL, f2fs_dio_submit_bio_old,
 			rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
 			DIO_SKIP_HOLES);
 
@@ -4101,3 +4131,64 @@  void f2fs_destroy_bio_entry_cache(void)
 {
 	kmem_cache_destroy(bio_entry_slab);
 }
+
+static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+			    unsigned int flags, struct iomap *iomap,
+			    struct iomap *srcmap)
+{
+	struct f2fs_map_blocks map = {};
+	pgoff_t next_pgofs = 0;
+	int err;
+
+	map.m_lblk = bytes_to_blks(inode, offset);
+	map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
+	map.m_next_pgofs = &next_pgofs;
+	map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+	if (flags & IOMAP_WRITE)
+		map.m_may_create = true;
+
+	err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
+			      F2FS_GET_BLOCK_DIO);
+	if (err)
+		return err;
+
+	iomap->offset = blks_to_bytes(inode, map.m_lblk);
+
+	if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
+		iomap->length = blks_to_bytes(inode, map.m_len);
+		if (map.m_flags & F2FS_MAP_MAPPED) {
+			iomap->type = IOMAP_MAPPED;
+			iomap->flags |= IOMAP_F_MERGED;
+		} else {
+			iomap->type = IOMAP_UNWRITTEN;
+		}
+		if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
+			return -EINVAL;
+		iomap->addr = blks_to_bytes(inode, map.m_pblk);
+
+		if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode))))
+			return -EINVAL;
+		iomap->bdev = inode->i_sb->s_bdev;
+	} else {
+		iomap->length = blks_to_bytes(inode, next_pgofs) -
+				iomap->offset;
+		iomap->type = IOMAP_HOLE;
+		iomap->addr = IOMAP_NULL_ADDR;
+	}
+
+	if (map.m_flags & F2FS_MAP_NEW)
+		iomap->flags |= IOMAP_F_NEW;
+	if ((inode->i_state & I_DIRTY_DATASYNC) ||
+	    offset + length > i_size_read(inode))
+		iomap->flags |= IOMAP_F_DIRTY;
+
+	return 0;
+}
+
+const struct iomap_ops f2fs_iomap_ops = {
+	.iomap_begin	= f2fs_iomap_begin,
+};
+
+const struct iomap_dio_ops f2fs_iomap_dio_ops = {
+	.submit_io	= f2fs_dio_submit_bio,
+};
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index da1da3111f18..d2b1ef6976c4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3639,6 +3639,8 @@  int f2fs_init_post_read_processing(void);
 void f2fs_destroy_post_read_processing(void);
 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
+extern const struct iomap_ops f2fs_iomap_ops;
+extern const struct iomap_dio_ops f2fs_iomap_dio_ops;
 
 /*
  * gc.c