Message ID | 20230825201225.348148-2-willy@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Many folio conversions for ceph | expand |
On 8/26/23 04:12, Matthew Wilcox (Oracle) wrote: > After we iterate through the locked folios using filemap_get_folios_tag(), > we currently convert back to a page (and then in some circumstaces back > to a folio again!). Just use a folio throughout and avoid various hidden > calls to compound_head(). Ceph still uses a page array to interact with > the OSD which should be cleaned up in a subsequent patch. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > --- > fs/ceph/addr.c | 100 ++++++++++++++++++++++++------------------------- > 1 file changed, 49 insertions(+), 51 deletions(-) > > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c > index f4863078f7fe..9a0a79833eb0 100644 > --- a/fs/ceph/addr.c > +++ b/fs/ceph/addr.c > @@ -1018,7 +1018,7 @@ static int ceph_writepages_start(struct address_space *mapping, > int num_ops = 0, op_idx; > unsigned i, nr_folios, max_pages, locked_pages = 0; > struct page **pages = NULL, **data_pages; > - struct page *page; > + struct folio *folio; > pgoff_t strip_unit_end = 0; > u64 offset = 0, len = 0; > bool from_pool = false; > @@ -1032,22 +1032,22 @@ static int ceph_writepages_start(struct address_space *mapping, > if (!nr_folios && !locked_pages) > break; > for (i = 0; i < nr_folios && locked_pages < max_pages; i++) { > - page = &fbatch.folios[i]->page; > - dout("? %p idx %lu\n", page, page->index); > + folio = fbatch.folios[i]; > + dout("? %p idx %lu\n", folio, folio->index); > if (locked_pages == 0) > - lock_page(page); /* first page */ > - else if (!trylock_page(page)) > + folio_lock(folio); /* first folio */ > + else if (!folio_trylock(folio)) > break; > > /* only dirty pages, or our accounting breaks */ > - if (unlikely(!PageDirty(page)) || > - unlikely(page->mapping != mapping)) { > - dout("!dirty or !mapping %p\n", page); > - unlock_page(page); > + if (unlikely(!folio_test_dirty(folio)) || > + unlikely(folio->mapping != mapping)) { > + dout("!dirty or !mapping %p\n", folio); > + folio_unlock(folio); > continue; > } > /* only if matching snap context */ > - pgsnapc = page_snap_context(page); > + pgsnapc = folio->private; > if (pgsnapc != snapc) { > dout("page snapc %p %lld != oldest %p %lld\n", > pgsnapc, pgsnapc->seq, snapc, snapc->seq); > @@ -1055,12 +1055,10 @@ static int ceph_writepages_start(struct address_space *mapping, > !ceph_wbc.head_snapc && > wbc->sync_mode != WB_SYNC_NONE) > should_loop = true; > - unlock_page(page); > + folio_unlock(folio); > continue; > } > - if (page_offset(page) >= ceph_wbc.i_size) { > - struct folio *folio = page_folio(page); > - > + if (folio_pos(folio) >= ceph_wbc.i_size) { > dout("folio at %lu beyond eof %llu\n", > folio->index, ceph_wbc.i_size); > if ((ceph_wbc.size_stable || > @@ -1071,31 +1069,32 @@ static int ceph_writepages_start(struct address_space *mapping, > folio_unlock(folio); > continue; > } > - if (strip_unit_end && (page->index > strip_unit_end)) { > - dout("end of strip unit %p\n", page); > - unlock_page(page); > + if (strip_unit_end && (folio->index > strip_unit_end)) { > + dout("end of strip unit %p\n", folio); > + folio_unlock(folio); > break; > } > - if (PageWriteback(page) || PageFsCache(page)) { > + if (folio_test_writeback(folio) || > + folio_test_fscache(folio)) { > if (wbc->sync_mode == WB_SYNC_NONE) { > - dout("%p under writeback\n", page); > - unlock_page(page); > + dout("%p under writeback\n", folio); > + folio_unlock(folio); > continue; > } > - dout("waiting on writeback %p\n", page); > - wait_on_page_writeback(page); > - wait_on_page_fscache(page); > + dout("waiting on writeback %p\n", folio); > + folio_wait_writeback(folio); > + folio_wait_fscache(folio); > } > > - if (!clear_page_dirty_for_io(page)) { > - dout("%p !clear_page_dirty_for_io\n", page); > - unlock_page(page); > + if (!folio_clear_dirty_for_io(folio)) { > + dout("%p !folio_clear_dirty_for_io\n", folio); > + folio_unlock(folio); > continue; > } > > /* > * We have something to write. If this is > - * the first locked page this time through, > + * the first locked folio this time through, > * calculate max possinle write size and > * allocate a page array > */ > @@ -1105,7 +1104,7 @@ static int ceph_writepages_start(struct address_space *mapping, > u32 xlen; > > /* prepare async write request */ > - offset = (u64)page_offset(page); > + offset = folio_pos(folio); > ceph_calc_file_object_mapping(&ci->i_layout, > offset, wsize, > &objnum, &objoff, > @@ -1113,7 +1112,7 @@ static int ceph_writepages_start(struct address_space *mapping, > len = xlen; > > num_ops = 1; > - strip_unit_end = page->index + > + strip_unit_end = folio->index + > ((len - 1) >> PAGE_SHIFT); > > BUG_ON(pages); > @@ -1128,23 +1127,23 @@ static int ceph_writepages_start(struct address_space *mapping, > } > > len = 0; > - } else if (page->index != > + } else if (folio->index != > (offset + len) >> PAGE_SHIFT) { > if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS : > CEPH_OSD_MAX_OPS)) { > - redirty_page_for_writepage(wbc, page); > - unlock_page(page); > + folio_redirty_for_writepage(wbc, folio); > + folio_unlock(folio); > break; > } > > num_ops++; > - offset = (u64)page_offset(page); > + offset = (u64)folio_pos(folio); > len = 0; > } > > /* note position of first page in fbatch */ > - dout("%p will write page %p idx %lu\n", > - inode, page, page->index); > + dout("%p will write folio %p idx %lu\n", > + inode, folio, folio->index); > > if (atomic_long_inc_return(&fsc->writeback_count) > > CONGESTION_ON_THRESH( > @@ -1153,7 +1152,7 @@ static int ceph_writepages_start(struct address_space *mapping, > > if (IS_ENCRYPTED(inode)) { > pages[locked_pages] = > - fscrypt_encrypt_pagecache_blocks(page, > + fscrypt_encrypt_pagecache_blocks(&folio->page, > PAGE_SIZE, 0, > locked_pages ? GFP_NOWAIT : GFP_NOFS); > if (IS_ERR(pages[locked_pages])) { > @@ -1163,17 +1162,17 @@ static int ceph_writepages_start(struct address_space *mapping, > /* better not fail on first page! */ > BUG_ON(locked_pages == 0); > pages[locked_pages] = NULL; > - redirty_page_for_writepage(wbc, page); > - unlock_page(page); > + folio_redirty_for_writepage(wbc, folio); > + folio_unlock(folio); > break; > } > ++locked_pages; > } else { > - pages[locked_pages++] = page; > + pages[locked_pages++] = &folio->page; > } > > fbatch.folios[i] = NULL; > - len += thp_size(page); > + len += folio_size(folio); > } > > /* did we get anything? */ > @@ -1222,7 +1221,7 @@ static int ceph_writepages_start(struct address_space *mapping, > BUG_ON(IS_ERR(req)); > } > BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 1]) + > - thp_size(pages[locked_pages - 1]) - offset); > + folio_size(folio) - offset); > > if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) { > rc = -EIO; > @@ -1236,9 +1235,9 @@ static int ceph_writepages_start(struct address_space *mapping, > data_pages = pages; > op_idx = 0; > for (i = 0; i < locked_pages; i++) { > - struct page *page = ceph_fscrypt_pagecache_page(pages[i]); > + struct folio *folio = page_folio(ceph_fscrypt_pagecache_page(pages[i])); > > - u64 cur_offset = page_offset(page); > + u64 cur_offset = folio_pos(folio); > /* > * Discontinuity in page range? Ceph can handle that by just passing > * multiple extents in the write op. > @@ -1267,10 +1266,10 @@ static int ceph_writepages_start(struct address_space *mapping, > op_idx++; > } > > - set_page_writeback(page); > + folio_start_writeback(folio); > if (caching) > - ceph_set_page_fscache(page); > - len += thp_size(page); > + ceph_set_page_fscache(pages[i]); > + len += folio_size(folio); > } > ceph_fscache_write_to_cache(inode, offset, len, caching); > > @@ -1280,7 +1279,7 @@ static int ceph_writepages_start(struct address_space *mapping, > /* writepages_finish() clears writeback pages > * according to the data length, so make sure > * data length covers all locked pages */ > - u64 min_len = len + 1 - thp_size(page); > + u64 min_len = len + 1 - folio_size(folio); > len = get_writepages_data_length(inode, pages[i - 1], > offset); > len = max(len, min_len); > @@ -1360,7 +1359,6 @@ static int ceph_writepages_start(struct address_space *mapping, > if (wbc->sync_mode != WB_SYNC_NONE && > start_index == 0 && /* all dirty pages were checked */ > !ceph_wbc.head_snapc) { > - struct page *page; > unsigned i, nr; > index = 0; > while ((index <= end) && > @@ -1369,10 +1367,10 @@ static int ceph_writepages_start(struct address_space *mapping, > PAGECACHE_TAG_WRITEBACK, > &fbatch))) { > for (i = 0; i < nr; i++) { > - page = &fbatch.folios[i]->page; > - if (page_snap_context(page) != snapc) > + struct folio *folio = fbatch.folios[i]; > + if (folio->private != snapc) Here IMO we should reuse and rename 'page_snap_context()' --> 'folio_snap_context()' instead of 'folio->private' directly. As I remembered if the dirty bit is not set the `page->private` still could be non-NULL in some cases ? Thanks - Xiubo > continue; > - wait_on_page_writeback(page); > + folio_wait_writeback(folio); > } > folio_batch_release(&fbatch); > cond_resched();
On 8/28/23 09:18, Xiubo Li wrote: > > On 8/26/23 04:12, Matthew Wilcox (Oracle) wrote: >> After we iterate through the locked folios using >> filemap_get_folios_tag(), >> we currently convert back to a page (and then in some circumstaces back >> to a folio again!). Just use a folio throughout and avoid various >> hidden >> calls to compound_head(). Ceph still uses a page array to interact with >> the OSD which should be cleaned up in a subsequent patch. >> >> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> >> --- >> fs/ceph/addr.c | 100 ++++++++++++++++++++++++------------------------- >> 1 file changed, 49 insertions(+), 51 deletions(-) >> >> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c >> index f4863078f7fe..9a0a79833eb0 100644 >> --- a/fs/ceph/addr.c >> +++ b/fs/ceph/addr.c >> @@ -1018,7 +1018,7 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> int num_ops = 0, op_idx; >> unsigned i, nr_folios, max_pages, locked_pages = 0; >> struct page **pages = NULL, **data_pages; >> - struct page *page; >> + struct folio *folio; >> pgoff_t strip_unit_end = 0; >> u64 offset = 0, len = 0; >> bool from_pool = false; >> @@ -1032,22 +1032,22 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> if (!nr_folios && !locked_pages) >> break; >> for (i = 0; i < nr_folios && locked_pages < max_pages; i++) { >> - page = &fbatch.folios[i]->page; >> - dout("? %p idx %lu\n", page, page->index); >> + folio = fbatch.folios[i]; >> + dout("? %p idx %lu\n", folio, folio->index); >> if (locked_pages == 0) >> - lock_page(page); /* first page */ >> - else if (!trylock_page(page)) >> + folio_lock(folio); /* first folio */ >> + else if (!folio_trylock(folio)) >> break; >> /* only dirty pages, or our accounting breaks */ >> - if (unlikely(!PageDirty(page)) || >> - unlikely(page->mapping != mapping)) { >> - dout("!dirty or !mapping %p\n", page); >> - unlock_page(page); >> + if (unlikely(!folio_test_dirty(folio)) || >> + unlikely(folio->mapping != mapping)) { >> + dout("!dirty or !mapping %p\n", folio); >> + folio_unlock(folio); >> continue; >> } >> /* only if matching snap context */ >> - pgsnapc = page_snap_context(page); >> + pgsnapc = folio->private; >> if (pgsnapc != snapc) { >> dout("page snapc %p %lld != oldest %p %lld\n", >> pgsnapc, pgsnapc->seq, snapc, snapc->seq); >> @@ -1055,12 +1055,10 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> !ceph_wbc.head_snapc && >> wbc->sync_mode != WB_SYNC_NONE) >> should_loop = true; >> - unlock_page(page); >> + folio_unlock(folio); >> continue; >> } >> - if (page_offset(page) >= ceph_wbc.i_size) { >> - struct folio *folio = page_folio(page); >> - >> + if (folio_pos(folio) >= ceph_wbc.i_size) { >> dout("folio at %lu beyond eof %llu\n", >> folio->index, ceph_wbc.i_size); >> if ((ceph_wbc.size_stable || >> @@ -1071,31 +1069,32 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> folio_unlock(folio); >> continue; >> } >> - if (strip_unit_end && (page->index > strip_unit_end)) { >> - dout("end of strip unit %p\n", page); >> - unlock_page(page); >> + if (strip_unit_end && (folio->index > strip_unit_end)) { >> + dout("end of strip unit %p\n", folio); >> + folio_unlock(folio); >> break; >> } >> - if (PageWriteback(page) || PageFsCache(page)) { >> + if (folio_test_writeback(folio) || >> + folio_test_fscache(folio)) { >> if (wbc->sync_mode == WB_SYNC_NONE) { >> - dout("%p under writeback\n", page); >> - unlock_page(page); >> + dout("%p under writeback\n", folio); >> + folio_unlock(folio); >> continue; >> } >> - dout("waiting on writeback %p\n", page); >> - wait_on_page_writeback(page); >> - wait_on_page_fscache(page); >> + dout("waiting on writeback %p\n", folio); >> + folio_wait_writeback(folio); >> + folio_wait_fscache(folio); >> } >> - if (!clear_page_dirty_for_io(page)) { >> - dout("%p !clear_page_dirty_for_io\n", page); >> - unlock_page(page); >> + if (!folio_clear_dirty_for_io(folio)) { >> + dout("%p !folio_clear_dirty_for_io\n", folio); >> + folio_unlock(folio); >> continue; >> } >> /* >> * We have something to write. If this is >> - * the first locked page this time through, >> + * the first locked folio this time through, >> * calculate max possinle write size and >> * allocate a page array >> */ >> @@ -1105,7 +1104,7 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> u32 xlen; >> /* prepare async write request */ >> - offset = (u64)page_offset(page); >> + offset = folio_pos(folio); >> ceph_calc_file_object_mapping(&ci->i_layout, >> offset, wsize, >> &objnum, &objoff, >> @@ -1113,7 +1112,7 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> len = xlen; >> num_ops = 1; >> - strip_unit_end = page->index + >> + strip_unit_end = folio->index + >> ((len - 1) >> PAGE_SHIFT); >> BUG_ON(pages); >> @@ -1128,23 +1127,23 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> } >> len = 0; >> - } else if (page->index != >> + } else if (folio->index != >> (offset + len) >> PAGE_SHIFT) { >> if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS : >> CEPH_OSD_MAX_OPS)) { >> - redirty_page_for_writepage(wbc, page); >> - unlock_page(page); >> + folio_redirty_for_writepage(wbc, folio); >> + folio_unlock(folio); >> break; >> } >> num_ops++; >> - offset = (u64)page_offset(page); >> + offset = (u64)folio_pos(folio); >> len = 0; >> } >> /* note position of first page in fbatch */ >> - dout("%p will write page %p idx %lu\n", >> - inode, page, page->index); >> + dout("%p will write folio %p idx %lu\n", >> + inode, folio, folio->index); >> if (atomic_long_inc_return(&fsc->writeback_count) > >> CONGESTION_ON_THRESH( >> @@ -1153,7 +1152,7 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> if (IS_ENCRYPTED(inode)) { >> pages[locked_pages] = >> - fscrypt_encrypt_pagecache_blocks(page, >> + fscrypt_encrypt_pagecache_blocks(&folio->page, >> PAGE_SIZE, 0, >> locked_pages ? GFP_NOWAIT : GFP_NOFS); >> if (IS_ERR(pages[locked_pages])) { >> @@ -1163,17 +1162,17 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> /* better not fail on first page! */ >> BUG_ON(locked_pages == 0); >> pages[locked_pages] = NULL; >> - redirty_page_for_writepage(wbc, page); >> - unlock_page(page); >> + folio_redirty_for_writepage(wbc, folio); >> + folio_unlock(folio); >> break; >> } >> ++locked_pages; >> } else { >> - pages[locked_pages++] = page; >> + pages[locked_pages++] = &folio->page; >> } >> fbatch.folios[i] = NULL; >> - len += thp_size(page); >> + len += folio_size(folio); >> } >> /* did we get anything? */ >> @@ -1222,7 +1221,7 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> BUG_ON(IS_ERR(req)); >> } >> BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - >> 1]) + >> - thp_size(pages[locked_pages - 1]) - offset); >> + folio_size(folio) - offset); >> if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) { >> rc = -EIO; >> @@ -1236,9 +1235,9 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> data_pages = pages; >> op_idx = 0; >> for (i = 0; i < locked_pages; i++) { >> - struct page *page = ceph_fscrypt_pagecache_page(pages[i]); >> + struct folio *folio = >> page_folio(ceph_fscrypt_pagecache_page(pages[i])); >> - u64 cur_offset = page_offset(page); >> + u64 cur_offset = folio_pos(folio); >> /* >> * Discontinuity in page range? Ceph can handle that by >> just passing >> * multiple extents in the write op. >> @@ -1267,10 +1266,10 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> op_idx++; >> } >> - set_page_writeback(page); >> + folio_start_writeback(folio); >> if (caching) >> - ceph_set_page_fscache(page); >> - len += thp_size(page); >> + ceph_set_page_fscache(pages[i]); >> + len += folio_size(folio); >> } >> ceph_fscache_write_to_cache(inode, offset, len, caching); >> @@ -1280,7 +1279,7 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> /* writepages_finish() clears writeback pages >> * according to the data length, so make sure >> * data length covers all locked pages */ >> - u64 min_len = len + 1 - thp_size(page); >> + u64 min_len = len + 1 - folio_size(folio); >> len = get_writepages_data_length(inode, pages[i - 1], >> offset); >> len = max(len, min_len); >> @@ -1360,7 +1359,6 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> if (wbc->sync_mode != WB_SYNC_NONE && >> start_index == 0 && /* all dirty pages were checked */ >> !ceph_wbc.head_snapc) { >> - struct page *page; >> unsigned i, nr; >> index = 0; >> while ((index <= end) && >> @@ -1369,10 +1367,10 @@ static int ceph_writepages_start(struct >> address_space *mapping, >> PAGECACHE_TAG_WRITEBACK, >> &fbatch))) { >> for (i = 0; i < nr; i++) { >> - page = &fbatch.folios[i]->page; >> - if (page_snap_context(page) != snapc) >> + struct folio *folio = fbatch.folios[i]; >> + if (folio->private != snapc) > > Here IMO we should reuse and rename 'page_snap_context()' --> > 'folio_snap_context()' instead of 'folio->private' directly. As I > remembered if the dirty bit is not set the `page->private` still could > be non-NULL in some cases ? > Hi Willy, Could you check the above comment ? There was one bug we tried to fix about this last year or earlier with Jeff as I remembered. Thanks - Xiubo > Thanks > > - Xiubo > > >> continue; >> - wait_on_page_writeback(page); >> + folio_wait_writeback(folio); >> } >> folio_batch_release(&fbatch); >> cond_resched();
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index f4863078f7fe..9a0a79833eb0 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1018,7 +1018,7 @@ static int ceph_writepages_start(struct address_space *mapping, int num_ops = 0, op_idx; unsigned i, nr_folios, max_pages, locked_pages = 0; struct page **pages = NULL, **data_pages; - struct page *page; + struct folio *folio; pgoff_t strip_unit_end = 0; u64 offset = 0, len = 0; bool from_pool = false; @@ -1032,22 +1032,22 @@ static int ceph_writepages_start(struct address_space *mapping, if (!nr_folios && !locked_pages) break; for (i = 0; i < nr_folios && locked_pages < max_pages; i++) { - page = &fbatch.folios[i]->page; - dout("? %p idx %lu\n", page, page->index); + folio = fbatch.folios[i]; + dout("? %p idx %lu\n", folio, folio->index); if (locked_pages == 0) - lock_page(page); /* first page */ - else if (!trylock_page(page)) + folio_lock(folio); /* first folio */ + else if (!folio_trylock(folio)) break; /* only dirty pages, or our accounting breaks */ - if (unlikely(!PageDirty(page)) || - unlikely(page->mapping != mapping)) { - dout("!dirty or !mapping %p\n", page); - unlock_page(page); + if (unlikely(!folio_test_dirty(folio)) || + unlikely(folio->mapping != mapping)) { + dout("!dirty or !mapping %p\n", folio); + folio_unlock(folio); continue; } /* only if matching snap context */ - pgsnapc = page_snap_context(page); + pgsnapc = folio->private; if (pgsnapc != snapc) { dout("page snapc %p %lld != oldest %p %lld\n", pgsnapc, pgsnapc->seq, snapc, snapc->seq); @@ -1055,12 +1055,10 @@ static int ceph_writepages_start(struct address_space *mapping, !ceph_wbc.head_snapc && wbc->sync_mode != WB_SYNC_NONE) should_loop = true; - unlock_page(page); + folio_unlock(folio); continue; } - if (page_offset(page) >= ceph_wbc.i_size) { - struct folio *folio = page_folio(page); - + if (folio_pos(folio) >= ceph_wbc.i_size) { dout("folio at %lu beyond eof %llu\n", folio->index, ceph_wbc.i_size); if ((ceph_wbc.size_stable || @@ -1071,31 +1069,32 @@ static int ceph_writepages_start(struct address_space *mapping, folio_unlock(folio); continue; } - if (strip_unit_end && (page->index > strip_unit_end)) { - dout("end of strip unit %p\n", page); - unlock_page(page); + if (strip_unit_end && (folio->index > strip_unit_end)) { + dout("end of strip unit %p\n", folio); + folio_unlock(folio); break; } - if (PageWriteback(page) || PageFsCache(page)) { + if (folio_test_writeback(folio) || + folio_test_fscache(folio)) { if (wbc->sync_mode == WB_SYNC_NONE) { - dout("%p under writeback\n", page); - unlock_page(page); + dout("%p under writeback\n", folio); + folio_unlock(folio); continue; } - dout("waiting on writeback %p\n", page); - wait_on_page_writeback(page); - wait_on_page_fscache(page); + dout("waiting on writeback %p\n", folio); + folio_wait_writeback(folio); + folio_wait_fscache(folio); } - if (!clear_page_dirty_for_io(page)) { - dout("%p !clear_page_dirty_for_io\n", page); - unlock_page(page); + if (!folio_clear_dirty_for_io(folio)) { + dout("%p !folio_clear_dirty_for_io\n", folio); + folio_unlock(folio); continue; } /* * We have something to write. If this is - * the first locked page this time through, + * the first locked folio this time through, * calculate max possinle write size and * allocate a page array */ @@ -1105,7 +1104,7 @@ static int ceph_writepages_start(struct address_space *mapping, u32 xlen; /* prepare async write request */ - offset = (u64)page_offset(page); + offset = folio_pos(folio); ceph_calc_file_object_mapping(&ci->i_layout, offset, wsize, &objnum, &objoff, @@ -1113,7 +1112,7 @@ static int ceph_writepages_start(struct address_space *mapping, len = xlen; num_ops = 1; - strip_unit_end = page->index + + strip_unit_end = folio->index + ((len - 1) >> PAGE_SHIFT); BUG_ON(pages); @@ -1128,23 +1127,23 @@ static int ceph_writepages_start(struct address_space *mapping, } len = 0; - } else if (page->index != + } else if (folio->index != (offset + len) >> PAGE_SHIFT) { if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS : CEPH_OSD_MAX_OPS)) { - redirty_page_for_writepage(wbc, page); - unlock_page(page); + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); break; } num_ops++; - offset = (u64)page_offset(page); + offset = (u64)folio_pos(folio); len = 0; } /* note position of first page in fbatch */ - dout("%p will write page %p idx %lu\n", - inode, page, page->index); + dout("%p will write folio %p idx %lu\n", + inode, folio, folio->index); if (atomic_long_inc_return(&fsc->writeback_count) > CONGESTION_ON_THRESH( @@ -1153,7 +1152,7 @@ static int ceph_writepages_start(struct address_space *mapping, if (IS_ENCRYPTED(inode)) { pages[locked_pages] = - fscrypt_encrypt_pagecache_blocks(page, + fscrypt_encrypt_pagecache_blocks(&folio->page, PAGE_SIZE, 0, locked_pages ? GFP_NOWAIT : GFP_NOFS); if (IS_ERR(pages[locked_pages])) { @@ -1163,17 +1162,17 @@ static int ceph_writepages_start(struct address_space *mapping, /* better not fail on first page! */ BUG_ON(locked_pages == 0); pages[locked_pages] = NULL; - redirty_page_for_writepage(wbc, page); - unlock_page(page); + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); break; } ++locked_pages; } else { - pages[locked_pages++] = page; + pages[locked_pages++] = &folio->page; } fbatch.folios[i] = NULL; - len += thp_size(page); + len += folio_size(folio); } /* did we get anything? */ @@ -1222,7 +1221,7 @@ static int ceph_writepages_start(struct address_space *mapping, BUG_ON(IS_ERR(req)); } BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 1]) + - thp_size(pages[locked_pages - 1]) - offset); + folio_size(folio) - offset); if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) { rc = -EIO; @@ -1236,9 +1235,9 @@ static int ceph_writepages_start(struct address_space *mapping, data_pages = pages; op_idx = 0; for (i = 0; i < locked_pages; i++) { - struct page *page = ceph_fscrypt_pagecache_page(pages[i]); + struct folio *folio = page_folio(ceph_fscrypt_pagecache_page(pages[i])); - u64 cur_offset = page_offset(page); + u64 cur_offset = folio_pos(folio); /* * Discontinuity in page range? Ceph can handle that by just passing * multiple extents in the write op. @@ -1267,10 +1266,10 @@ static int ceph_writepages_start(struct address_space *mapping, op_idx++; } - set_page_writeback(page); + folio_start_writeback(folio); if (caching) - ceph_set_page_fscache(page); - len += thp_size(page); + ceph_set_page_fscache(pages[i]); + len += folio_size(folio); } ceph_fscache_write_to_cache(inode, offset, len, caching); @@ -1280,7 +1279,7 @@ static int ceph_writepages_start(struct address_space *mapping, /* writepages_finish() clears writeback pages * according to the data length, so make sure * data length covers all locked pages */ - u64 min_len = len + 1 - thp_size(page); + u64 min_len = len + 1 - folio_size(folio); len = get_writepages_data_length(inode, pages[i - 1], offset); len = max(len, min_len); @@ -1360,7 +1359,6 @@ static int ceph_writepages_start(struct address_space *mapping, if (wbc->sync_mode != WB_SYNC_NONE && start_index == 0 && /* all dirty pages were checked */ !ceph_wbc.head_snapc) { - struct page *page; unsigned i, nr; index = 0; while ((index <= end) && @@ -1369,10 +1367,10 @@ static int ceph_writepages_start(struct address_space *mapping, PAGECACHE_TAG_WRITEBACK, &fbatch))) { for (i = 0; i < nr; i++) { - page = &fbatch.folios[i]->page; - if (page_snap_context(page) != snapc) + struct folio *folio = fbatch.folios[i]; + if (folio->private != snapc) continue; - wait_on_page_writeback(page); + folio_wait_writeback(folio); } folio_batch_release(&fbatch); cond_resched();
After we iterate through the locked folios using filemap_get_folios_tag(), we currently convert back to a page (and then in some circumstaces back to a folio again!). Just use a folio throughout and avoid various hidden calls to compound_head(). Ceph still uses a page array to interact with the OSD which should be cleaned up in a subsequent patch. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> --- fs/ceph/addr.c | 100 ++++++++++++++++++++++++------------------------- 1 file changed, 49 insertions(+), 51 deletions(-)