Message ID | 20220526173840.578265-5-shr@fb.com (mailing list archive) |
---|---|
State | Deferred, archived |
Headers | show |
Series | io-uring/xfs: support async buffered writes | expand |
On Thu, May 26, 2022 at 10:38:28AM -0700, Stefan Roesch wrote: > Add the kiocb flags parameter to the function iomap_page_create(). > Depending on the value of the flags parameter it enables different gfp > flags. > > No intended functional changes in this patch. > > Signed-off-by: Stefan Roesch <shr@fb.com> > Reviewed-by: Jan Kara <jack@suse.cz> > --- > fs/iomap/buffered-io.c | 19 +++++++++++++------ > 1 file changed, 13 insertions(+), 6 deletions(-) > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index 8ce8720093b9..d6ddc54e190e 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -44,16 +44,21 @@ static inline struct iomap_page *to_iomap_page(struct folio *folio) > static struct bio_set iomap_ioend_bioset; > > static struct iomap_page * > -iomap_page_create(struct inode *inode, struct folio *folio) > +iomap_page_create(struct inode *inode, struct folio *folio, unsigned int flags) > { > struct iomap_page *iop = to_iomap_page(folio); > unsigned int nr_blocks = i_blocks_per_folio(inode, folio); > + gfp_t gfp = GFP_NOFS | __GFP_NOFAIL; > > if (iop || nr_blocks <= 1) > return iop; > > + if (flags & IOMAP_NOWAIT) > + gfp = GFP_NOWAIT; Hmm. GFP_NOWAIT means we don't wait for reclaim or IO or filesystem callbacks, and NOFAIL means we retry indefinitely. What happens in the NOWAIT|NOFAIL case? Does that imply that the kzalloc loops without triggering direct reclaim until someone else frees enough memory? --D > + > iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)), > - GFP_NOFS | __GFP_NOFAIL); > + gfp); > + > spin_lock_init(&iop->uptodate_lock); > if (folio_test_uptodate(folio)) > bitmap_fill(iop->uptodate, nr_blocks); > @@ -226,7 +231,7 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, > if (WARN_ON_ONCE(size > iomap->length)) > return -EIO; > if (offset > 0) > - iop = iomap_page_create(iter->inode, folio); > + iop = iomap_page_create(iter->inode, folio, iter->flags); > else > iop = to_iomap_page(folio); > > @@ -264,7 +269,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, > return iomap_read_inline_data(iter, folio); > > /* zero post-eof blocks as the page may be mapped */ > - iop = iomap_page_create(iter->inode, folio); > + iop = iomap_page_create(iter->inode, folio, iter->flags); > iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); > if (plen == 0) > goto done; > @@ -550,7 +555,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, > size_t len, struct folio *folio) > { > const struct iomap *srcmap = iomap_iter_srcmap(iter); > - struct iomap_page *iop = iomap_page_create(iter->inode, folio); > + struct iomap_page *iop; > loff_t block_size = i_blocksize(iter->inode); > loff_t block_start = round_down(pos, block_size); > loff_t block_end = round_up(pos + len, block_size); > @@ -561,6 +566,8 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, > return 0; > folio_clear_error(folio); > > + iop = iomap_page_create(iter->inode, folio, iter->flags); > + > do { > iomap_adjust_read_range(iter->inode, folio, &block_start, > block_end - block_start, &poff, &plen); > @@ -1332,7 +1339,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, > struct writeback_control *wbc, struct inode *inode, > struct folio *folio, u64 end_pos) > { > - struct iomap_page *iop = iomap_page_create(inode, folio); > + struct iomap_page *iop = iomap_page_create(inode, folio, 0); > struct iomap_ioend *ioend, *next; > unsigned len = i_blocksize(inode); > unsigned nblocks = i_blocks_per_folio(inode, folio); > -- > 2.30.2 >
On 5/26/22 11:25 AM, Darrick J. Wong wrote: > On Thu, May 26, 2022 at 10:38:28AM -0700, Stefan Roesch wrote: >> Add the kiocb flags parameter to the function iomap_page_create(). >> Depending on the value of the flags parameter it enables different gfp >> flags. >> >> No intended functional changes in this patch. >> >> Signed-off-by: Stefan Roesch <shr@fb.com> >> Reviewed-by: Jan Kara <jack@suse.cz> >> --- >> fs/iomap/buffered-io.c | 19 +++++++++++++------ >> 1 file changed, 13 insertions(+), 6 deletions(-) >> >> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c >> index 8ce8720093b9..d6ddc54e190e 100644 >> --- a/fs/iomap/buffered-io.c >> +++ b/fs/iomap/buffered-io.c >> @@ -44,16 +44,21 @@ static inline struct iomap_page *to_iomap_page(struct folio *folio) >> static struct bio_set iomap_ioend_bioset; >> >> static struct iomap_page * >> -iomap_page_create(struct inode *inode, struct folio *folio) >> +iomap_page_create(struct inode *inode, struct folio *folio, unsigned int flags) >> { >> struct iomap_page *iop = to_iomap_page(folio); >> unsigned int nr_blocks = i_blocks_per_folio(inode, folio); >> + gfp_t gfp = GFP_NOFS | __GFP_NOFAIL; >> >> if (iop || nr_blocks <= 1) >> return iop; >> >> + if (flags & IOMAP_NOWAIT) >> + gfp = GFP_NOWAIT; > > Hmm. GFP_NOWAIT means we don't wait for reclaim or IO or filesystem > callbacks, and NOFAIL means we retry indefinitely. What happens in the > NOWAIT|NOFAIL case? Does that imply that the kzalloc loops without > triggering direct reclaim until someone else frees enough memory? > Before this patch all requests allocate memory with the GFP_NOFS | __GFP_NOFAIL flags. With this patch no_wait requests will be allocated with GFP_NOWAIT. I don't see how allocations will happen with GFP_NOWAIT| __GFP_NOFAIL. If an allocation with GFP_NOWAIT fails. It will return -EAGAIN, for the write code path. In io-uring, the write request will be punted to the io-worker. The io-worker will process the write request, but nowait will not be specified. > --D > >> + >> iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)), >> - GFP_NOFS | __GFP_NOFAIL); >> + gfp); >> + >> spin_lock_init(&iop->uptodate_lock); >> if (folio_test_uptodate(folio)) >> bitmap_fill(iop->uptodate, nr_blocks); >> @@ -226,7 +231,7 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, >> if (WARN_ON_ONCE(size > iomap->length)) >> return -EIO; >> if (offset > 0) >> - iop = iomap_page_create(iter->inode, folio); >> + iop = iomap_page_create(iter->inode, folio, iter->flags); >> else >> iop = to_iomap_page(folio); >> >> @@ -264,7 +269,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, >> return iomap_read_inline_data(iter, folio); >> >> /* zero post-eof blocks as the page may be mapped */ >> - iop = iomap_page_create(iter->inode, folio); >> + iop = iomap_page_create(iter->inode, folio, iter->flags); >> iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); >> if (plen == 0) >> goto done; >> @@ -550,7 +555,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, >> size_t len, struct folio *folio) >> { >> const struct iomap *srcmap = iomap_iter_srcmap(iter); >> - struct iomap_page *iop = iomap_page_create(iter->inode, folio); >> + struct iomap_page *iop; >> loff_t block_size = i_blocksize(iter->inode); >> loff_t block_start = round_down(pos, block_size); >> loff_t block_end = round_up(pos + len, block_size); >> @@ -561,6 +566,8 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, >> return 0; >> folio_clear_error(folio); >> >> + iop = iomap_page_create(iter->inode, folio, iter->flags); >> + >> do { >> iomap_adjust_read_range(iter->inode, folio, &block_start, >> block_end - block_start, &poff, &plen); >> @@ -1332,7 +1339,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, >> struct writeback_control *wbc, struct inode *inode, >> struct folio *folio, u64 end_pos) >> { >> - struct iomap_page *iop = iomap_page_create(inode, folio); >> + struct iomap_page *iop = iomap_page_create(inode, folio, 0); >> struct iomap_ioend *ioend, *next; >> unsigned len = i_blocksize(inode); >> unsigned nblocks = i_blocks_per_folio(inode, folio); >> -- >> 2.30.2 >>
On Thu, May 26, 2022 at 10:38:28AM -0700, Stefan Roesch wrote: > Add the kiocb flags parameter to the function iomap_page_create(). > Depending on the value of the flags parameter it enables different gfp > flags. > > No intended functional changes in this patch. > > Signed-off-by: Stefan Roesch <shr@fb.com> > Reviewed-by: Jan Kara <jack@suse.cz> > --- > fs/iomap/buffered-io.c | 19 +++++++++++++------ > 1 file changed, 13 insertions(+), 6 deletions(-) > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index 8ce8720093b9..d6ddc54e190e 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -44,16 +44,21 @@ static inline struct iomap_page *to_iomap_page(struct folio *folio) > static struct bio_set iomap_ioend_bioset; > > static struct iomap_page * > -iomap_page_create(struct inode *inode, struct folio *folio) > +iomap_page_create(struct inode *inode, struct folio *folio, unsigned int flags) > { > struct iomap_page *iop = to_iomap_page(folio); > unsigned int nr_blocks = i_blocks_per_folio(inode, folio); > + gfp_t gfp = GFP_NOFS | __GFP_NOFAIL; > > if (iop || nr_blocks <= 1) > return iop; > > + if (flags & IOMAP_NOWAIT) > + gfp = GFP_NOWAIT; > + Maybe this would confuse people less if it was: if (flags & IOMAP_NOWAIT) gfp = GFP_NOWAIT; else gfp = GFP_NOFS | __GFP_NOFAIL; but even as is it is perfectly fine (and I tend to write these kinds of shortcuts as well). Looks good either way: Reviewed-by: Christoph Hellwig <hch@lst.de>
On 5/30/22 11:54 PM, Christoph Hellwig wrote: > On Thu, May 26, 2022 at 10:38:28AM -0700, Stefan Roesch wrote: >> Add the kiocb flags parameter to the function iomap_page_create(). >> Depending on the value of the flags parameter it enables different gfp >> flags. >> >> No intended functional changes in this patch. >> >> Signed-off-by: Stefan Roesch <shr@fb.com> >> Reviewed-by: Jan Kara <jack@suse.cz> >> --- >> fs/iomap/buffered-io.c | 19 +++++++++++++------ >> 1 file changed, 13 insertions(+), 6 deletions(-) >> >> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c >> index 8ce8720093b9..d6ddc54e190e 100644 >> --- a/fs/iomap/buffered-io.c >> +++ b/fs/iomap/buffered-io.c >> @@ -44,16 +44,21 @@ static inline struct iomap_page *to_iomap_page(struct folio *folio) >> static struct bio_set iomap_ioend_bioset; >> >> static struct iomap_page * >> -iomap_page_create(struct inode *inode, struct folio *folio) >> +iomap_page_create(struct inode *inode, struct folio *folio, unsigned int flags) >> { >> struct iomap_page *iop = to_iomap_page(folio); >> unsigned int nr_blocks = i_blocks_per_folio(inode, folio); >> + gfp_t gfp = GFP_NOFS | __GFP_NOFAIL; >> >> if (iop || nr_blocks <= 1) >> return iop; >> >> + if (flags & IOMAP_NOWAIT) >> + gfp = GFP_NOWAIT; >> + > > Maybe this would confuse people less if it was: > > if (flags & IOMAP_NOWAIT) > gfp = GFP_NOWAIT; > else > gfp = GFP_NOFS | __GFP_NOFAIL; > I made the above change. > but even as is it is perfectly fine (and I tend to write these kinds of > shortcuts as well). > > Looks good either way: > > Reviewed-by: Christoph Hellwig <hch@lst.de>
On Thu, 2022-05-26 at 11:25 -0700, Darrick J. Wong wrote: > On Thu, May 26, 2022 at 10:38:28AM -0700, Stefan Roesch wrote: > > > > static struct iomap_page * > > -iomap_page_create(struct inode *inode, struct folio *folio) > > +iomap_page_create(struct inode *inode, struct folio *folio, > > unsigned int flags) > > { > > struct iomap_page *iop = to_iomap_page(folio); > > unsigned int nr_blocks = i_blocks_per_folio(inode, folio); > > + gfp_t gfp = GFP_NOFS | __GFP_NOFAIL; > > > > if (iop || nr_blocks <= 1) > > return iop; > > > > + if (flags & IOMAP_NOWAIT) > > + gfp = GFP_NOWAIT; > > Hmm. GFP_NOWAIT means we don't wait for reclaim or IO or filesystem > callbacks, and NOFAIL means we retry indefinitely. What happens in > the > NOWAIT|NOFAIL case? Does that imply that the kzalloc loops without > triggering direct reclaim until someone else frees enough memory? > > --D I have a question that is a bit offtopic but since it is concerning GFP flags and this is what is discussed here maybe a participant will kindly give me some hints about this mystery that has burned me for so long... Why does out_of_memory() requires GFP_FS to kill a process? AFAIK, no filesystem-dependent operations are needed to kill a process...
On Tue 31-05-22 20:34:20, Olivier Langlois wrote: > On Thu, 2022-05-26 at 11:25 -0700, Darrick J. Wong wrote: > > On Thu, May 26, 2022 at 10:38:28AM -0700, Stefan Roesch wrote: > > > > > > static struct iomap_page * > > > -iomap_page_create(struct inode *inode, struct folio *folio) > > > +iomap_page_create(struct inode *inode, struct folio *folio, > > > unsigned int flags) > > > { > > > struct iomap_page *iop = to_iomap_page(folio); > > > unsigned int nr_blocks = i_blocks_per_folio(inode, folio); > > > + gfp_t gfp = GFP_NOFS | __GFP_NOFAIL; > > > > > > if (iop || nr_blocks <= 1) > > > return iop; > > > > > > + if (flags & IOMAP_NOWAIT) > > > + gfp = GFP_NOWAIT; > > > > Hmm. GFP_NOWAIT means we don't wait for reclaim or IO or filesystem > > callbacks, and NOFAIL means we retry indefinitely. What happens in > > the > > NOWAIT|NOFAIL case? Does that imply that the kzalloc loops without > > triggering direct reclaim until someone else frees enough memory? > > > > --D > > I have a question that is a bit offtopic but since it is concerning GFP > flags and this is what is discussed here maybe a participant will > kindly give me some hints about this mystery that has burned me for so > long... > > Why does out_of_memory() requires GFP_FS to kill a process? AFAIK, no > filesystem-dependent operations are needed to kill a process... AFAIK it is because without GFP_FS, the chances for direct reclaim are fairly limited so we are not sure whether the machine is indeed out of memory or whether it is just that we need to reclaim from fs pools to free up memory. Honza
On Wed, 2022-06-01 at 10:21 +0200, Jan Kara wrote: > > I have a question that is a bit offtopic but since it is concerning > > GFP > > flags and this is what is discussed here maybe a participant will > > kindly give me some hints about this mystery that has burned me for > > so > > long... > > > > Why does out_of_memory() requires GFP_FS to kill a process? AFAIK, > > no > > filesystem-dependent operations are needed to kill a process... > > AFAIK it is because without GFP_FS, the chances for direct reclaim > are > fairly limited so we are not sure whether the machine is indeed out > of > memory or whether it is just that we need to reclaim from fs pools to > free > up memory. > > Honza Jan, thx a lot for this lead. I will study it further. Your answer made me realized that the meaning of direct reclaim was not crystal clear to me. I'll return to my Bovet & Cesati book to clear that out (That is probably the book in my bookshelf that I have read the most). After having sending out my question, I have came up with another possible explanation... Maybe it is not so much to send the killing signal to the oom victim that requires GFP_FS but maybe the trouble the condition is avoiding is the oom victim process trying to return memory to VFS as it exits while VFS is busy waiting for its allocation request to succeed...
On Tue, May 31, 2022 at 11:12:38AM -0700, Stefan Roesch wrote: > > > On 5/30/22 11:54 PM, Christoph Hellwig wrote: > > On Thu, May 26, 2022 at 10:38:28AM -0700, Stefan Roesch wrote: > >> Add the kiocb flags parameter to the function iomap_page_create(). > >> Depending on the value of the flags parameter it enables different gfp > >> flags. > >> > >> No intended functional changes in this patch. > >> > >> Signed-off-by: Stefan Roesch <shr@fb.com> > >> Reviewed-by: Jan Kara <jack@suse.cz> > >> --- > >> fs/iomap/buffered-io.c | 19 +++++++++++++------ > >> 1 file changed, 13 insertions(+), 6 deletions(-) > >> > >> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > >> index 8ce8720093b9..d6ddc54e190e 100644 > >> --- a/fs/iomap/buffered-io.c > >> +++ b/fs/iomap/buffered-io.c > >> @@ -44,16 +44,21 @@ static inline struct iomap_page *to_iomap_page(struct folio *folio) > >> static struct bio_set iomap_ioend_bioset; > >> > >> static struct iomap_page * > >> -iomap_page_create(struct inode *inode, struct folio *folio) > >> +iomap_page_create(struct inode *inode, struct folio *folio, unsigned int flags) > >> { > >> struct iomap_page *iop = to_iomap_page(folio); > >> unsigned int nr_blocks = i_blocks_per_folio(inode, folio); > >> + gfp_t gfp = GFP_NOFS | __GFP_NOFAIL; > >> > >> if (iop || nr_blocks <= 1) > >> return iop; > >> > >> + if (flags & IOMAP_NOWAIT) > >> + gfp = GFP_NOWAIT; > >> + > > > > Maybe this would confuse people less if it was: > > > > if (flags & IOMAP_NOWAIT) > > gfp = GFP_NOWAIT; > > else > > gfp = GFP_NOFS | __GFP_NOFAIL; > > > > I made the above change. Thanks. I misread all the gfp handling as: gfp_t gfp = GFP_NOFS | __GFP_NOFAIL; if (flags & IOMAP_NOWAIT) gfp |= GFP_NOWAIT; Which was why my question did not make sense. Sorry about that. :( --D > > > but even as is it is perfectly fine (and I tend to write these kinds of > > shortcuts as well). > > > > Looks good either way: > > > > Reviewed-by: Christoph Hellwig <hch@lst.de>
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 8ce8720093b9..d6ddc54e190e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -44,16 +44,21 @@ static inline struct iomap_page *to_iomap_page(struct folio *folio) static struct bio_set iomap_ioend_bioset; static struct iomap_page * -iomap_page_create(struct inode *inode, struct folio *folio) +iomap_page_create(struct inode *inode, struct folio *folio, unsigned int flags) { struct iomap_page *iop = to_iomap_page(folio); unsigned int nr_blocks = i_blocks_per_folio(inode, folio); + gfp_t gfp = GFP_NOFS | __GFP_NOFAIL; if (iop || nr_blocks <= 1) return iop; + if (flags & IOMAP_NOWAIT) + gfp = GFP_NOWAIT; + iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)), - GFP_NOFS | __GFP_NOFAIL); + gfp); + spin_lock_init(&iop->uptodate_lock); if (folio_test_uptodate(folio)) bitmap_fill(iop->uptodate, nr_blocks); @@ -226,7 +231,7 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, if (WARN_ON_ONCE(size > iomap->length)) return -EIO; if (offset > 0) - iop = iomap_page_create(iter->inode, folio); + iop = iomap_page_create(iter->inode, folio, iter->flags); else iop = to_iomap_page(folio); @@ -264,7 +269,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, return iomap_read_inline_data(iter, folio); /* zero post-eof blocks as the page may be mapped */ - iop = iomap_page_create(iter->inode, folio); + iop = iomap_page_create(iter->inode, folio, iter->flags); iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); if (plen == 0) goto done; @@ -550,7 +555,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, size_t len, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); - struct iomap_page *iop = iomap_page_create(iter->inode, folio); + struct iomap_page *iop; loff_t block_size = i_blocksize(iter->inode); loff_t block_start = round_down(pos, block_size); loff_t block_end = round_up(pos + len, block_size); @@ -561,6 +566,8 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, return 0; folio_clear_error(folio); + iop = iomap_page_create(iter->inode, folio, iter->flags); + do { iomap_adjust_read_range(iter->inode, folio, &block_start, block_end - block_start, &poff, &plen); @@ -1332,7 +1339,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct inode *inode, struct folio *folio, u64 end_pos) { - struct iomap_page *iop = iomap_page_create(inode, folio); + struct iomap_page *iop = iomap_page_create(inode, folio, 0); struct iomap_ioend *ioend, *next; unsigned len = i_blocksize(inode); unsigned nblocks = i_blocks_per_folio(inode, folio);