Message ID | 20200115023843.31325-7-willy@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Replacing the readpages a_op | expand |
On Tue, Jan 14, 2020 at 06:38:40PM -0800, Matthew Wilcox wrote: > static loff_t > +iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length, > void *data, struct iomap *iomap, struct iomap *srcmap) > { > struct iomap_readpage_ctx *ctx = data; > @@ -410,10 +381,8 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length, > ctx->cur_page = NULL; > } > if (!ctx->cur_page) { > - ctx->cur_page = iomap_next_page(inode, ctx->pages, > - pos, length, &done); > - if (!ctx->cur_page) > - break; > + ctx->cur_page = readahead_page(inode->i_mapping, > + pos / PAGE_SIZE); Don't we at least need a sanity check for a NULL cur_page here? Also the readahead_page version in your previous patch seems to expect a byte offset, so the division above would not be required. (and should probably be replaced with a right shift anyway no matter where it ends up) > +unsigned > +iomap_readahead(struct address_space *mapping, pgoff_t start, > unsigned nr_pages, const struct iomap_ops *ops) > { > struct iomap_readpage_ctx ctx = { > - .pages = pages, > .is_readahead = true, > }; > - loff_t pos = page_offset(list_entry(pages->prev, struct page, lru)); > - loff_t last = page_offset(list_entry(pages->next, struct page, lru)); > - loff_t length = last - pos + PAGE_SIZE, ret = 0; > + loff_t pos = start * PAGE_SIZE; > + loff_t length = nr_pages * PAGE_SIZE; Any good reason not to pass byte offsets for start and length? > + return length / PAGE_SIZE; Same for the return value? For the file systems that would usually be a more natural interface than a page index and number of pages.
On Tue, Jan 14, 2020 at 11:16:28PM -0800, Christoph Hellwig wrote: > On Tue, Jan 14, 2020 at 06:38:40PM -0800, Matthew Wilcox wrote: > > static loff_t > > +iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length, > > void *data, struct iomap *iomap, struct iomap *srcmap) > > { > > struct iomap_readpage_ctx *ctx = data; > > @@ -410,10 +381,8 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length, > > ctx->cur_page = NULL; > > } > > if (!ctx->cur_page) { > > - ctx->cur_page = iomap_next_page(inode, ctx->pages, > > - pos, length, &done); > > - if (!ctx->cur_page) > > - break; > > + ctx->cur_page = readahead_page(inode->i_mapping, > > + pos / PAGE_SIZE); > > Don't we at least need a sanity check for a NULL cur_page here? I don't think so. The caller has already put the locked page into the page cache at that index. If the page has gone away, that's a bug, and I don't think BUG_ON is all that much better than a NULL pointer derefence. Indeed, readahead_page() checks PageLocked, so it can't return NULL. > Also the readahead_page version in your previous patch seems to expect > a byte offset, so the division above would not be required. Oops. I had intended to make readahead_pages() look like this: struct page *readahead_page(struct address_space *mapping, pgoff_t index) { struct page *page = xa_load(&mapping->i_pages, index); VM_BUG_ON_PAGE(!PageLocked(page), page); return page; } If only our tools could warn about these kinds of mistakes. > (and should > probably be replaced with a right shift anyway no matter where it ends > up) If the compiler can't tell that x / 4096 and x >> 12 are precisely the same and choose the more efficient of the two, we have big problems. > > +unsigned > > +iomap_readahead(struct address_space *mapping, pgoff_t start, > > unsigned nr_pages, const struct iomap_ops *ops) > > { > > struct iomap_readpage_ctx ctx = { > > - .pages = pages, > > .is_readahead = true, > > }; > > - loff_t pos = page_offset(list_entry(pages->prev, struct page, lru)); > > - loff_t last = page_offset(list_entry(pages->next, struct page, lru)); > > - loff_t length = last - pos + PAGE_SIZE, ret = 0; > > + loff_t pos = start * PAGE_SIZE; > > + loff_t length = nr_pages * PAGE_SIZE; > > Any good reason not to pass byte offsets for start and length? > > > + return length / PAGE_SIZE; > > Same for the return value? > > For the file systems that would usually be a more natural interface than > a page index and number of pages. That seems to depend on the filesystem. iomap definitely would be happier with loff_t, but cifs prefers pgoff_t. I should probably survey a few more filesystems and see if there's a strong lean in one direction or the other.
On Tue, Jan 14, 2020 at 11:42:43PM -0800, Matthew Wilcox wrote: > On Tue, Jan 14, 2020 at 11:16:28PM -0800, Christoph Hellwig wrote: > > On Tue, Jan 14, 2020 at 06:38:40PM -0800, Matthew Wilcox wrote: > > > +iomap_readahead(struct address_space *mapping, pgoff_t start, > > > unsigned nr_pages, const struct iomap_ops *ops) > > > { > > > struct iomap_readpage_ctx ctx = { > > > - .pages = pages, > > > .is_readahead = true, > > > }; > > > - loff_t pos = page_offset(list_entry(pages->prev, struct page, lru)); > > > - loff_t last = page_offset(list_entry(pages->next, struct page, lru)); > > > - loff_t length = last - pos + PAGE_SIZE, ret = 0; > > > + loff_t pos = start * PAGE_SIZE; > > > + loff_t length = nr_pages * PAGE_SIZE; > > > > Any good reason not to pass byte offsets for start and length? > > > > > + return length / PAGE_SIZE; > > > > Same for the return value? > > > > For the file systems that would usually be a more natural interface than > > a page index and number of pages. > > That seems to depend on the filesystem. iomap definitely would be happier > with loff_t, but cifs prefers pgoff_t. I should probably survey a few > more filesystems and see if there's a strong lean in one direction or > the other. I've converted all the filesystems now except for those that use fscache. http://git.infradead.org/users/willy/linux-dax.git/shortlog/refs/heads/readahead iomap is the only one for which an loff_t makes sense as an argument. fscache will also prefer page index & count once Dave's conversion series lands: https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit/?h=fscache-iter&id=ae317744dfb9732123e554467a9f6d93733e8a5b I'll prep a serious conversion series for 5.6 soon (skipping cifs, but converting all the non-fscache filesystems).
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 828444e14d09..a835b99f281f 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -216,7 +216,6 @@ struct iomap_readpage_ctx { bool cur_page_in_bio; bool is_readahead; struct bio *bio; - struct list_head *pages; }; static void @@ -367,36 +366,8 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops) } EXPORT_SYMBOL_GPL(iomap_readpage); -static struct page * -iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos, - loff_t length, loff_t *done) -{ - while (!list_empty(pages)) { - struct page *page = lru_to_page(pages); - - if (page_offset(page) >= (u64)pos + length) - break; - - list_del(&page->lru); - if (!add_to_page_cache_lru(page, inode->i_mapping, page->index, - GFP_NOFS)) - return page; - - /* - * If we already have a page in the page cache at index we are - * done. Upper layers don't care if it is uptodate after the - * readpages call itself as every page gets checked again once - * actually needed. - */ - *done += PAGE_SIZE; - put_page(page); - } - - return NULL; -} - static loff_t -iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length, +iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iomap *iomap, struct iomap *srcmap) { struct iomap_readpage_ctx *ctx = data; @@ -410,10 +381,8 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length, ctx->cur_page = NULL; } if (!ctx->cur_page) { - ctx->cur_page = iomap_next_page(inode, ctx->pages, - pos, length, &done); - if (!ctx->cur_page) - break; + ctx->cur_page = readahead_page(inode->i_mapping, + pos / PAGE_SIZE); ctx->cur_page_in_bio = false; } ret = iomap_readpage_actor(inode, pos + done, length - done, @@ -423,48 +392,37 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length, return done; } -int -iomap_readpages(struct address_space *mapping, struct list_head *pages, +unsigned +iomap_readahead(struct address_space *mapping, pgoff_t start, unsigned nr_pages, const struct iomap_ops *ops) { struct iomap_readpage_ctx ctx = { - .pages = pages, .is_readahead = true, }; - loff_t pos = page_offset(list_entry(pages->prev, struct page, lru)); - loff_t last = page_offset(list_entry(pages->next, struct page, lru)); - loff_t length = last - pos + PAGE_SIZE, ret = 0; + loff_t pos = start * PAGE_SIZE; + loff_t length = nr_pages * PAGE_SIZE; - trace_iomap_readpages(mapping->host, nr_pages); + trace_iomap_readahead(mapping->host, nr_pages); while (length > 0) { - ret = iomap_apply(mapping->host, pos, length, 0, ops, - &ctx, iomap_readpages_actor); + loff_t ret = iomap_apply(mapping->host, pos, length, 0, ops, + &ctx, iomap_readahead_actor); if (ret <= 0) { WARN_ON_ONCE(ret == 0); - goto done; + break; } pos += ret; length -= ret; } - ret = 0; -done: + if (ctx.bio) submit_bio(ctx.bio); - if (ctx.cur_page) { - if (!ctx.cur_page_in_bio) - unlock_page(ctx.cur_page); + if (ctx.cur_page && ctx.cur_page_in_bio) put_page(ctx.cur_page); - } - /* - * Check that we didn't lose a page due to the arcance calling - * conventions.. - */ - WARN_ON_ONCE(!ret && !list_empty(ctx.pages)); - return ret; + return length / PAGE_SIZE; } -EXPORT_SYMBOL_GPL(iomap_readpages); +EXPORT_SYMBOL_GPL(iomap_readahead); /* * iomap_is_partially_uptodate checks whether blocks within a page are diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index 6dc227b8c47e..d6ba705f938a 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -39,7 +39,7 @@ DEFINE_EVENT(iomap_readpage_class, name, \ TP_PROTO(struct inode *inode, int nr_pages), \ TP_ARGS(inode, nr_pages)) DEFINE_READPAGE_EVENT(iomap_readpage); -DEFINE_READPAGE_EVENT(iomap_readpages); +DEFINE_READPAGE_EVENT(iomap_readahead); DECLARE_EVENT_CLASS(iomap_page_class, TP_PROTO(struct inode *inode, struct page *page, unsigned long off, diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3a688eb5c5ae..4d9da34e759b 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -621,14 +621,14 @@ xfs_vm_readpage( return iomap_readpage(page, &xfs_read_iomap_ops); } -STATIC int -xfs_vm_readpages( +STATIC unsigned +xfs_vm_readahead( struct file *unused, struct address_space *mapping, - struct list_head *pages, + pgoff_t start, unsigned nr_pages) { - return iomap_readpages(mapping, pages, nr_pages, &xfs_read_iomap_ops); + return iomap_readahead(mapping, start, nr_pages, &xfs_read_iomap_ops); } static int @@ -644,7 +644,7 @@ xfs_iomap_swapfile_activate( const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, - .readpages = xfs_vm_readpages, + .readahead = xfs_vm_readahead, .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, .set_page_dirty = iomap_set_page_dirty, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 8b09463dae0d..81c6067e9b61 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -155,7 +155,7 @@ loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); int iomap_readpage(struct page *page, const struct iomap_ops *ops); -int iomap_readpages(struct address_space *mapping, struct list_head *pages, +unsigned iomap_readahead(struct address_space *, pgoff_t start, unsigned nr_pages, const struct iomap_ops *ops); int iomap_set_page_dirty(struct page *page); int iomap_is_partially_uptodate(struct page *page, unsigned long from,