Message ID | 20250307145650.568-1-yohan.joung@sk.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [f2fs-dev] f2fs: optimize f2fs DIO overwrites | expand |
On 3/7/25 22:56, Yohan Joung wrote: > this is unnecessary when we know we are overwriting already allocated > blocks and the overhead of starting a transaction can be significant > especially for multithreaded workloads doing small writes. Hi Yohan, So you're trying to avoid f2fs_map_lock() in dio write path, right? Thanks, > > Signed-off-by: Yohan Joung <yohan.joung@sk.com> > --- > fs/f2fs/data.c | 20 ++++++++++++++++++++ > fs/f2fs/f2fs.h | 1 + > fs/f2fs/file.c | 5 ++++- > 3 files changed, 25 insertions(+), 1 deletion(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index 09437dbd1b42..728630037b74 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -4267,6 +4267,26 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, > return 0; > } > > +static int f2fs_iomap_overwrite_begin(struct inode *inode, loff_t offset, > + loff_t length, unsigned flags, struct iomap *iomap, > + struct iomap *srcmap) > +{ > + int ret; > + > + /* > + * Even for writes we don't need to allocate blocks, so just pretend > + * we are reading to save overhead of starting a transaction. > + */ > + flags &= ~IOMAP_WRITE; > + ret = f2fs_iomap_begin(inode, offset, length, flags, iomap, srcmap); > + WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED); > + return ret; > +} > + > const struct iomap_ops f2fs_iomap_ops = { > .iomap_begin = f2fs_iomap_begin, > }; > + > +const struct iomap_ops f2fs_iomap_overwrite_ops = { > + .iomap_begin = f2fs_iomap_overwrite_begin, > +}; > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index c6cc2694f9ac..0511ab5ed42a 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -3936,6 +3936,7 @@ void f2fs_destroy_post_read_processing(void); > int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); > void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi); > extern const struct iomap_ops f2fs_iomap_ops; > +extern const struct iomap_ops f2fs_iomap_overwrite_ops; > > static inline struct page *f2fs_find_data_page(struct inode *inode, > pgoff_t index, pgoff_t *next_pgofs) > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 82b21baf5628..bb2fe6dac9b6 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -4985,6 +4985,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, > const ssize_t count = iov_iter_count(from); > unsigned int dio_flags; > struct iomap_dio *dio; > + const struct iomap_ops *iomap_ops = &f2fs_iomap_ops; > ssize_t ret; > > trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); > @@ -5025,7 +5026,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, > dio_flags = 0; > if (pos + count > inode->i_size) > dio_flags |= IOMAP_DIO_FORCE_WAIT; > - dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, > + else if (f2fs_overwrite_io(inode, pos, count)) > + iomap_ops = &f2fs_iomap_overwrite_ops; > + dio = __iomap_dio_rw(iocb, from, iomap_ops, > &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); > if (IS_ERR_OR_NULL(dio)) { > ret = PTR_ERR_OR_ZERO(dio);
On Tue, Mar 11, 2025 at 5:00 AM Chao Yu <chao@kernel.org> wrote: > > On 3/7/25 22:56, Yohan Joung wrote: > > this is unnecessary when we know we are overwriting already allocated > > blocks and the overhead of starting a transaction can be significant > > especially for multithreaded workloads doing small writes. > > Hi Yohan, > > So you're trying to avoid f2fs_map_lock() in dio write path, right? > > Thanks, > > > > > Signed-off-by: Yohan Joung <yohan.joung@sk.com> > > --- > > fs/f2fs/data.c | 20 ++++++++++++++++++++ > > fs/f2fs/f2fs.h | 1 + > > fs/f2fs/file.c | 5 ++++- > > 3 files changed, 25 insertions(+), 1 deletion(-) > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > > index 09437dbd1b42..728630037b74 100644 > > --- a/fs/f2fs/data.c > > +++ b/fs/f2fs/data.c > > @@ -4267,6 +4267,26 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, > > return 0; > > } > > > > +static int f2fs_iomap_overwrite_begin(struct inode *inode, loff_t offset, > > + loff_t length, unsigned flags, struct iomap *iomap, > > + struct iomap *srcmap) > > +{ > > + int ret; > > + > > + /* > > + * Even for writes we don't need to allocate blocks, so just pretend > > + * we are reading to save overhead of starting a transaction. > > + */ > > + flags &= ~IOMAP_WRITE; > > + ret = f2fs_iomap_begin(inode, offset, length, flags, iomap, srcmap); > > + WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED); > > + return ret; > > +} > > + > > const struct iomap_ops f2fs_iomap_ops = { > > .iomap_begin = f2fs_iomap_begin, > > }; > > + > > +const struct iomap_ops f2fs_iomap_overwrite_ops = { > > + .iomap_begin = f2fs_iomap_overwrite_begin, > > +}; > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > > index c6cc2694f9ac..0511ab5ed42a 100644 > > --- a/fs/f2fs/f2fs.h > > +++ b/fs/f2fs/f2fs.h > > @@ -3936,6 +3936,7 @@ void f2fs_destroy_post_read_processing(void); > > int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); > > void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi); > > extern const struct iomap_ops f2fs_iomap_ops; > > +extern const struct iomap_ops f2fs_iomap_overwrite_ops; > > > > static inline struct page *f2fs_find_data_page(struct inode *inode, > > pgoff_t index, pgoff_t *next_pgofs) > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > > index 82b21baf5628..bb2fe6dac9b6 100644 > > --- a/fs/f2fs/file.c > > +++ b/fs/f2fs/file.c > > @@ -4985,6 +4985,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, > > const ssize_t count = iov_iter_count(from); > > unsigned int dio_flags; > > struct iomap_dio *dio; > > + const struct iomap_ops *iomap_ops = &f2fs_iomap_ops; > > ssize_t ret; > > > > trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); > > @@ -5025,7 +5026,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, > > dio_flags = 0; > > if (pos + count > inode->i_size) > > dio_flags |= IOMAP_DIO_FORCE_WAIT; > > - dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, > > + else if (f2fs_overwrite_io(inode, pos, count)) > > + iomap_ops = &f2fs_iomap_overwrite_ops; > > + dio = __iomap_dio_rw(iocb, from, iomap_ops, > > &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); > > if (IS_ERR_OR_NULL(dio)) { > > ret = PTR_ERR_OR_ZERO(dio); I think we can add the overwrite check in f2fs_iomap_begin() before setting the map.m_may_create, instead of adding a new function f2fs_iomap_overwrite_begin(). What do you think? >
> From: Daeho Jeong <daeho43@gmail.com> > Sent: Wednesday, March 12, 2025 6:14 AM > To: Chao Yu <chao@kernel.org> > Cc: Yohan Joung <jyh429@gmail.com>; jaegeuk@kernel.org; linux-f2fs- > devel@lists.sourceforge.net; linux-kernel@vger.kernel.org; 정요한(JOUNG > YOHAN) Mobile AE <yohan.joung@sk.com> > Subject: [External Mail] Re: [PATCH] f2fs: optimize f2fs DIO overwrites > > On Tue, Mar 11, 2025 at 5:00 AM Chao Yu <chao@kernel.org> wrote: > > > > On 3/7/25 22:56, Yohan Joung wrote: > > > this is unnecessary when we know we are overwriting already > > > allocated blocks and the overhead of starting a transaction can be > > > significant especially for multithreaded workloads doing small writes. > > > > Hi Yohan, > > > > So you're trying to avoid f2fs_map_lock() in dio write path, right? > > > > Thanks, > > > > > > > > Signed-off-by: Yohan Joung <yohan.joung@sk.com> > > > --- > > > fs/f2fs/data.c | 20 ++++++++++++++++++++ fs/f2fs/f2fs.h | 1 + > > > fs/f2fs/file.c | 5 ++++- > > > 3 files changed, 25 insertions(+), 1 deletion(-) > > > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index > > > 09437dbd1b42..728630037b74 100644 > > > --- a/fs/f2fs/data.c > > > +++ b/fs/f2fs/data.c > > > @@ -4267,6 +4267,26 @@ static int f2fs_iomap_begin(struct inode *inode, > loff_t offset, loff_t length, > > > return 0; > > > } > > > > > > +static int f2fs_iomap_overwrite_begin(struct inode *inode, loff_t > offset, > > > + loff_t length, unsigned flags, struct iomap *iomap, > > > + struct iomap *srcmap) > > > +{ > > > + int ret; > > > + > > > + /* > > > + * Even for writes we don't need to allocate blocks, so just > pretend > > > + * we are reading to save overhead of starting a transaction. > > > + */ > > > + flags &= ~IOMAP_WRITE; > > > + ret = f2fs_iomap_begin(inode, offset, length, flags, iomap, > srcmap); > > > + WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED); > > > + return ret; > > > +} > > > + > > > const struct iomap_ops f2fs_iomap_ops = { > > > .iomap_begin = f2fs_iomap_begin, > > > }; > > > + > > > +const struct iomap_ops f2fs_iomap_overwrite_ops = { > > > + .iomap_begin = f2fs_iomap_overwrite_begin, > > > +}; > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index > > > c6cc2694f9ac..0511ab5ed42a 100644 > > > --- a/fs/f2fs/f2fs.h > > > +++ b/fs/f2fs/f2fs.h > > > @@ -3936,6 +3936,7 @@ void f2fs_destroy_post_read_processing(void); > > > int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); void > > > f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi); extern const > > > struct iomap_ops f2fs_iomap_ops; > > > +extern const struct iomap_ops f2fs_iomap_overwrite_ops; > > > > > > static inline struct page *f2fs_find_data_page(struct inode *inode, > > > pgoff_t index, pgoff_t *next_pgofs) diff --git > > > a/fs/f2fs/file.c b/fs/f2fs/file.c index 82b21baf5628..bb2fe6dac9b6 > > > 100644 > > > --- a/fs/f2fs/file.c > > > +++ b/fs/f2fs/file.c > > > @@ -4985,6 +4985,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb > *iocb, struct iov_iter *from, > > > const ssize_t count = iov_iter_count(from); > > > unsigned int dio_flags; > > > struct iomap_dio *dio; > > > + const struct iomap_ops *iomap_ops = &f2fs_iomap_ops; > > > ssize_t ret; > > > > > > trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); @@ > > > -5025,7 +5026,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb > *iocb, struct iov_iter *from, > > > dio_flags = 0; > > > if (pos + count > inode->i_size) > > > dio_flags |= IOMAP_DIO_FORCE_WAIT; > > > - dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, > > > + else if (f2fs_overwrite_io(inode, pos, count)) > > > + iomap_ops = &f2fs_iomap_overwrite_ops; > > > + dio = __iomap_dio_rw(iocb, from, iomap_ops, > > > &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); > > > if (IS_ERR_OR_NULL(dio)) { > > > ret = PTR_ERR_OR_ZERO(dio); > > I think we can add the overwrite check in f2fs_iomap_begin() before > setting the map.m_may_create, instead of adding a new function > f2fs_iomap_overwrite_begin(). > What do you think? Daeho, Is this the way you want it changed? If so, I'll upload it like this static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { struct f2fs_map_blocks map = {}; pgoff_t next_pgofs = 0; int err; map.m_lblk = F2FS_BYTES_TO_BLK(offset); map.m_len = F2FS_BYTES_TO_BLK(offset + length - 1) - map.m_lblk + 1; map.m_next_pgofs = &next_pgofs; map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), inode->i_write_hint); if ((flags & IOMAP_WRITE) && !f2fs_overwrite_io(inode, offset, length)) map.m_may_create = true; > > >
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 09437dbd1b42..728630037b74 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -4267,6 +4267,26 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, return 0; } +static int f2fs_iomap_overwrite_begin(struct inode *inode, loff_t offset, + loff_t length, unsigned flags, struct iomap *iomap, + struct iomap *srcmap) +{ + int ret; + + /* + * Even for writes we don't need to allocate blocks, so just pretend + * we are reading to save overhead of starting a transaction. + */ + flags &= ~IOMAP_WRITE; + ret = f2fs_iomap_begin(inode, offset, length, flags, iomap, srcmap); + WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED); + return ret; +} + const struct iomap_ops f2fs_iomap_ops = { .iomap_begin = f2fs_iomap_begin, }; + +const struct iomap_ops f2fs_iomap_overwrite_ops = { + .iomap_begin = f2fs_iomap_overwrite_begin, +}; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c6cc2694f9ac..0511ab5ed42a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3936,6 +3936,7 @@ void f2fs_destroy_post_read_processing(void); int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi); extern const struct iomap_ops f2fs_iomap_ops; +extern const struct iomap_ops f2fs_iomap_overwrite_ops; static inline struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, pgoff_t *next_pgofs) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 82b21baf5628..bb2fe6dac9b6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4985,6 +4985,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, const ssize_t count = iov_iter_count(from); unsigned int dio_flags; struct iomap_dio *dio; + const struct iomap_ops *iomap_ops = &f2fs_iomap_ops; ssize_t ret; trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); @@ -5025,7 +5026,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, dio_flags = 0; if (pos + count > inode->i_size) dio_flags |= IOMAP_DIO_FORCE_WAIT; - dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, + else if (f2fs_overwrite_io(inode, pos, count)) + iomap_ops = &f2fs_iomap_overwrite_ops; + dio = __iomap_dio_rw(iocb, from, iomap_ops, &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); if (IS_ERR_OR_NULL(dio)) { ret = PTR_ERR_OR_ZERO(dio);
this is unnecessary when we know we are overwriting already allocated blocks and the overhead of starting a transaction can be significant especially for multithreaded workloads doing small writes. Signed-off-by: Yohan Joung <yohan.joung@sk.com> --- fs/f2fs/data.c | 20 ++++++++++++++++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 5 ++++- 3 files changed, 25 insertions(+), 1 deletion(-)