diff mbox series

[f2fs-dev] f2fs: optimize f2fs DIO overwrites

Message ID 20250307145650.568-1-yohan.joung@sk.com (mailing list archive)
State Superseded
Headers show
Series [f2fs-dev] f2fs: optimize f2fs DIO overwrites | expand

Commit Message

Yohan Joung March 7, 2025, 2:56 p.m. UTC
this is unnecessary when we know we are overwriting already allocated
blocks and the overhead of starting a transaction can be significant
especially for multithreaded workloads doing small writes.

Signed-off-by: Yohan Joung <yohan.joung@sk.com>
---
 fs/f2fs/data.c | 20 ++++++++++++++++++++
 fs/f2fs/f2fs.h |  1 +
 fs/f2fs/file.c |  5 ++++-
 3 files changed, 25 insertions(+), 1 deletion(-)

Comments

Chao Yu March 11, 2025, noon UTC | #1
On 3/7/25 22:56, Yohan Joung wrote:
> this is unnecessary when we know we are overwriting already allocated
> blocks and the overhead of starting a transaction can be significant
> especially for multithreaded workloads doing small writes.

Hi Yohan,

So you're trying to avoid f2fs_map_lock() in dio write path, right?

Thanks,

> 
> Signed-off-by: Yohan Joung <yohan.joung@sk.com>
> ---
>  fs/f2fs/data.c | 20 ++++++++++++++++++++
>  fs/f2fs/f2fs.h |  1 +
>  fs/f2fs/file.c |  5 ++++-
>  3 files changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 09437dbd1b42..728630037b74 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -4267,6 +4267,26 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>  	return 0;
>  }
>  
> +static int f2fs_iomap_overwrite_begin(struct inode *inode, loff_t offset,
> +		loff_t length, unsigned flags, struct iomap *iomap,
> +		struct iomap *srcmap)
> +{
> +	int ret;
> +
> +	/*
> +	 * Even for writes we don't need to allocate blocks, so just pretend
> +	 * we are reading to save overhead of starting a transaction.
> +	 */
> +	flags &= ~IOMAP_WRITE;
> +	ret = f2fs_iomap_begin(inode, offset, length, flags, iomap, srcmap);
> +	WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED);
> +	return ret;
> +}
> +
>  const struct iomap_ops f2fs_iomap_ops = {
>  	.iomap_begin	= f2fs_iomap_begin,
>  };
> +
> +const struct iomap_ops f2fs_iomap_overwrite_ops = {
> +	.iomap_begin	= f2fs_iomap_overwrite_begin,
> +};
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index c6cc2694f9ac..0511ab5ed42a 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3936,6 +3936,7 @@ void f2fs_destroy_post_read_processing(void);
>  int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
>  void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
>  extern const struct iomap_ops f2fs_iomap_ops;
> +extern const struct iomap_ops f2fs_iomap_overwrite_ops;
>  
>  static inline struct page *f2fs_find_data_page(struct inode *inode,
>  		pgoff_t index, pgoff_t *next_pgofs)
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 82b21baf5628..bb2fe6dac9b6 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -4985,6 +4985,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
>  	const ssize_t count = iov_iter_count(from);
>  	unsigned int dio_flags;
>  	struct iomap_dio *dio;
> +	const struct iomap_ops *iomap_ops = &f2fs_iomap_ops;
>  	ssize_t ret;
>  
>  	trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
> @@ -5025,7 +5026,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
>  	dio_flags = 0;
>  	if (pos + count > inode->i_size)
>  		dio_flags |= IOMAP_DIO_FORCE_WAIT;
> -	dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
> +	else if (f2fs_overwrite_io(inode, pos, count))
> +		iomap_ops = &f2fs_iomap_overwrite_ops;
> +	dio = __iomap_dio_rw(iocb, from, iomap_ops,
>  			     &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
>  	if (IS_ERR_OR_NULL(dio)) {
>  		ret = PTR_ERR_OR_ZERO(dio);
Daeho Jeong March 11, 2025, 9:14 p.m. UTC | #2
On Tue, Mar 11, 2025 at 5:00 AM Chao Yu <chao@kernel.org> wrote:
>
> On 3/7/25 22:56, Yohan Joung wrote:
> > this is unnecessary when we know we are overwriting already allocated
> > blocks and the overhead of starting a transaction can be significant
> > especially for multithreaded workloads doing small writes.
>
> Hi Yohan,
>
> So you're trying to avoid f2fs_map_lock() in dio write path, right?
>
> Thanks,
>
> >
> > Signed-off-by: Yohan Joung <yohan.joung@sk.com>
> > ---
> >  fs/f2fs/data.c | 20 ++++++++++++++++++++
> >  fs/f2fs/f2fs.h |  1 +
> >  fs/f2fs/file.c |  5 ++++-
> >  3 files changed, 25 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 09437dbd1b42..728630037b74 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -4267,6 +4267,26 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> >       return 0;
> >  }
> >
> > +static int f2fs_iomap_overwrite_begin(struct inode *inode, loff_t offset,
> > +             loff_t length, unsigned flags, struct iomap *iomap,
> > +             struct iomap *srcmap)
> > +{
> > +     int ret;
> > +
> > +     /*
> > +      * Even for writes we don't need to allocate blocks, so just pretend
> > +      * we are reading to save overhead of starting a transaction.
> > +      */
> > +     flags &= ~IOMAP_WRITE;
> > +     ret = f2fs_iomap_begin(inode, offset, length, flags, iomap, srcmap);
> > +     WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED);
> > +     return ret;
> > +}
> > +
> >  const struct iomap_ops f2fs_iomap_ops = {
> >       .iomap_begin    = f2fs_iomap_begin,
> >  };
> > +
> > +const struct iomap_ops f2fs_iomap_overwrite_ops = {
> > +     .iomap_begin    = f2fs_iomap_overwrite_begin,
> > +};
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index c6cc2694f9ac..0511ab5ed42a 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -3936,6 +3936,7 @@ void f2fs_destroy_post_read_processing(void);
> >  int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
> >  void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
> >  extern const struct iomap_ops f2fs_iomap_ops;
> > +extern const struct iomap_ops f2fs_iomap_overwrite_ops;
> >
> >  static inline struct page *f2fs_find_data_page(struct inode *inode,
> >               pgoff_t index, pgoff_t *next_pgofs)
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 82b21baf5628..bb2fe6dac9b6 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -4985,6 +4985,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
> >       const ssize_t count = iov_iter_count(from);
> >       unsigned int dio_flags;
> >       struct iomap_dio *dio;
> > +     const struct iomap_ops *iomap_ops = &f2fs_iomap_ops;
> >       ssize_t ret;
> >
> >       trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
> > @@ -5025,7 +5026,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
> >       dio_flags = 0;
> >       if (pos + count > inode->i_size)
> >               dio_flags |= IOMAP_DIO_FORCE_WAIT;
> > -     dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
> > +     else if (f2fs_overwrite_io(inode, pos, count))
> > +             iomap_ops = &f2fs_iomap_overwrite_ops;
> > +     dio = __iomap_dio_rw(iocb, from, iomap_ops,
> >                            &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
> >       if (IS_ERR_OR_NULL(dio)) {
> >               ret = PTR_ERR_OR_ZERO(dio);

I think we can add the overwrite check in f2fs_iomap_begin() before
setting the map.m_may_create, instead of adding a new function
f2fs_iomap_overwrite_begin().
What do you think?

>
yohan.joung@sk.com March 11, 2025, 11:23 p.m. UTC | #3
> From: Daeho Jeong <daeho43@gmail.com>
> Sent: Wednesday, March 12, 2025 6:14 AM
> To: Chao Yu <chao@kernel.org>
> Cc: Yohan Joung <jyh429@gmail.com>; jaegeuk@kernel.org; linux-f2fs-
> devel@lists.sourceforge.net; linux-kernel@vger.kernel.org; 정요한(JOUNG
> YOHAN) Mobile AE <yohan.joung@sk.com>
> Subject: [External Mail] Re: [PATCH] f2fs: optimize f2fs DIO overwrites
> 
> On Tue, Mar 11, 2025 at 5:00 AM Chao Yu <chao@kernel.org> wrote:
> >
> > On 3/7/25 22:56, Yohan Joung wrote:
> > > this is unnecessary when we know we are overwriting already
> > > allocated blocks and the overhead of starting a transaction can be
> > > significant especially for multithreaded workloads doing small writes.
> >
> > Hi Yohan,
> >
> > So you're trying to avoid f2fs_map_lock() in dio write path, right?
> >
> > Thanks,
> >
> > >
> > > Signed-off-by: Yohan Joung <yohan.joung@sk.com>
> > > ---
> > >  fs/f2fs/data.c | 20 ++++++++++++++++++++  fs/f2fs/f2fs.h |  1 +
> > > fs/f2fs/file.c |  5 ++++-
> > >  3 files changed, 25 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index
> > > 09437dbd1b42..728630037b74 100644
> > > --- a/fs/f2fs/data.c
> > > +++ b/fs/f2fs/data.c
> > > @@ -4267,6 +4267,26 @@ static int f2fs_iomap_begin(struct inode *inode,
> loff_t offset, loff_t length,
> > >       return 0;
> > >  }
> > >
> > > +static int f2fs_iomap_overwrite_begin(struct inode *inode, loff_t
> offset,
> > > +             loff_t length, unsigned flags, struct iomap *iomap,
> > > +             struct iomap *srcmap)
> > > +{
> > > +     int ret;
> > > +
> > > +     /*
> > > +      * Even for writes we don't need to allocate blocks, so just
> pretend
> > > +      * we are reading to save overhead of starting a transaction.
> > > +      */
> > > +     flags &= ~IOMAP_WRITE;
> > > +     ret = f2fs_iomap_begin(inode, offset, length, flags, iomap,
> srcmap);
> > > +     WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED);
> > > +     return ret;
> > > +}
> > > +
> > >  const struct iomap_ops f2fs_iomap_ops = {
> > >       .iomap_begin    = f2fs_iomap_begin,
> > >  };
> > > +
> > > +const struct iomap_ops f2fs_iomap_overwrite_ops = {
> > > +     .iomap_begin    = f2fs_iomap_overwrite_begin,
> > > +};
> > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index
> > > c6cc2694f9ac..0511ab5ed42a 100644
> > > --- a/fs/f2fs/f2fs.h
> > > +++ b/fs/f2fs/f2fs.h
> > > @@ -3936,6 +3936,7 @@ void f2fs_destroy_post_read_processing(void);
> > >  int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);  void
> > > f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);  extern const
> > > struct iomap_ops f2fs_iomap_ops;
> > > +extern const struct iomap_ops f2fs_iomap_overwrite_ops;
> > >
> > >  static inline struct page *f2fs_find_data_page(struct inode *inode,
> > >               pgoff_t index, pgoff_t *next_pgofs) diff --git
> > > a/fs/f2fs/file.c b/fs/f2fs/file.c index 82b21baf5628..bb2fe6dac9b6
> > > 100644
> > > --- a/fs/f2fs/file.c
> > > +++ b/fs/f2fs/file.c
> > > @@ -4985,6 +4985,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb
> *iocb, struct iov_iter *from,
> > >       const ssize_t count = iov_iter_count(from);
> > >       unsigned int dio_flags;
> > >       struct iomap_dio *dio;
> > > +     const struct iomap_ops *iomap_ops = &f2fs_iomap_ops;
> > >       ssize_t ret;
> > >
> > >       trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); @@
> > > -5025,7 +5026,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb
> *iocb, struct iov_iter *from,
> > >       dio_flags = 0;
> > >       if (pos + count > inode->i_size)
> > >               dio_flags |= IOMAP_DIO_FORCE_WAIT;
> > > -     dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
> > > +     else if (f2fs_overwrite_io(inode, pos, count))
> > > +             iomap_ops = &f2fs_iomap_overwrite_ops;
> > > +     dio = __iomap_dio_rw(iocb, from, iomap_ops,
> > >                            &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
> > >       if (IS_ERR_OR_NULL(dio)) {
> > >               ret = PTR_ERR_OR_ZERO(dio);
> 
> I think we can add the overwrite check in f2fs_iomap_begin() before
> setting the map.m_may_create, instead of adding a new function
> f2fs_iomap_overwrite_begin().
> What do you think?
Daeho, Is this the way you want it changed? If so, I'll upload it like this 
static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
			    unsigned int flags, struct iomap *iomap,
			    struct iomap *srcmap)
{
	struct f2fs_map_blocks map = {};
	pgoff_t next_pgofs = 0;
	int err;

	map.m_lblk = F2FS_BYTES_TO_BLK(offset);
	map.m_len = F2FS_BYTES_TO_BLK(offset + length - 1) - map.m_lblk + 1;
	map.m_next_pgofs = &next_pgofs;
	map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
						inode->i_write_hint);
	if ((flags & IOMAP_WRITE) && !f2fs_overwrite_io(inode, offset, length))
		map.m_may_create = true;

> 
> >
diff mbox series

Patch

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 09437dbd1b42..728630037b74 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -4267,6 +4267,26 @@  static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	return 0;
 }
 
+static int f2fs_iomap_overwrite_begin(struct inode *inode, loff_t offset,
+		loff_t length, unsigned flags, struct iomap *iomap,
+		struct iomap *srcmap)
+{
+	int ret;
+
+	/*
+	 * Even for writes we don't need to allocate blocks, so just pretend
+	 * we are reading to save overhead of starting a transaction.
+	 */
+	flags &= ~IOMAP_WRITE;
+	ret = f2fs_iomap_begin(inode, offset, length, flags, iomap, srcmap);
+	WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED);
+	return ret;
+}
+
 const struct iomap_ops f2fs_iomap_ops = {
 	.iomap_begin	= f2fs_iomap_begin,
 };
+
+const struct iomap_ops f2fs_iomap_overwrite_ops = {
+	.iomap_begin	= f2fs_iomap_overwrite_begin,
+};
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c6cc2694f9ac..0511ab5ed42a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3936,6 +3936,7 @@  void f2fs_destroy_post_read_processing(void);
 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
 extern const struct iomap_ops f2fs_iomap_ops;
+extern const struct iomap_ops f2fs_iomap_overwrite_ops;
 
 static inline struct page *f2fs_find_data_page(struct inode *inode,
 		pgoff_t index, pgoff_t *next_pgofs)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 82b21baf5628..bb2fe6dac9b6 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4985,6 +4985,7 @@  static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
 	const ssize_t count = iov_iter_count(from);
 	unsigned int dio_flags;
 	struct iomap_dio *dio;
+	const struct iomap_ops *iomap_ops = &f2fs_iomap_ops;
 	ssize_t ret;
 
 	trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
@@ -5025,7 +5026,9 @@  static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
 	dio_flags = 0;
 	if (pos + count > inode->i_size)
 		dio_flags |= IOMAP_DIO_FORCE_WAIT;
-	dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
+	else if (f2fs_overwrite_io(inode, pos, count))
+		iomap_ops = &f2fs_iomap_overwrite_ops;
+	dio = __iomap_dio_rw(iocb, from, iomap_ops,
 			     &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
 	if (IS_ERR_OR_NULL(dio)) {
 		ret = PTR_ERR_OR_ZERO(dio);