Message ID | 20200323202259.13363-25-willy@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Change readahead API | expand |
On Mon, Mar 23, 2020 at 9:23 PM Matthew Wilcox <willy@infradead.org> wrote: > > From: "Matthew Wilcox (Oracle)" <willy@infradead.org> > > Use the new readahead operation in fuse. Switching away from the > read_cache_pages() helper gets rid of an implicit call to put_page(), > so we can get rid of the get_page() call in fuse_readpages_fill(). > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > Reviewed-by: Dave Chinner <dchinner@redhat.com> > Reviewed-by: William Kucharski <william.kucharski@oracle.com> > --- > fs/fuse/file.c | 46 +++++++++++++++++++--------------------------- > 1 file changed, 19 insertions(+), 27 deletions(-) > > diff --git a/fs/fuse/file.c b/fs/fuse/file.c > index 9d67b830fb7a..5749505bcff6 100644 > --- a/fs/fuse/file.c > +++ b/fs/fuse/file.c > @@ -923,9 +923,8 @@ struct fuse_fill_data { > unsigned int max_pages; > }; > > -static int fuse_readpages_fill(void *_data, struct page *page) > +static int fuse_readpages_fill(struct fuse_fill_data *data, struct page *page) > { > - struct fuse_fill_data *data = _data; > struct fuse_io_args *ia = data->ia; > struct fuse_args_pages *ap = &ia->ap; > struct inode *inode = data->inode; > @@ -941,10 +940,8 @@ static int fuse_readpages_fill(void *_data, struct page *page) > fc->max_pages); > fuse_send_readpages(ia, data->file); > data->ia = ia = fuse_io_alloc(NULL, data->max_pages); > - if (!ia) { > - unlock_page(page); > + if (!ia) > return -ENOMEM; > - } > ap = &ia->ap; > } > > @@ -954,7 +951,6 @@ static int fuse_readpages_fill(void *_data, struct page *page) > return -EIO; > } > > - get_page(page); > ap->pages[ap->num_pages] = page; > ap->descs[ap->num_pages].length = PAGE_SIZE; > ap->num_pages++; > @@ -962,37 +958,33 @@ static int fuse_readpages_fill(void *_data, struct page *page) > return 0; > } > > -static int fuse_readpages(struct file *file, struct address_space *mapping, > - struct list_head *pages, unsigned nr_pages) > +static void fuse_readahead(struct readahead_control *rac) > { > - struct inode *inode = mapping->host; > + struct inode *inode = rac->mapping->host; > struct fuse_conn *fc = get_fuse_conn(inode); > struct fuse_fill_data data; > - int err; > + struct page *page; > > - err = -EIO; > if (is_bad_inode(inode)) > - goto out; > + return; > > - data.file = file; > + data.file = rac->file; > data.inode = inode; > - data.nr_pages = nr_pages; > - data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages); > -; > + data.nr_pages = readahead_count(rac); > + data.max_pages = min_t(unsigned int, data.nr_pages, fc->max_pages); > data.ia = fuse_io_alloc(NULL, data.max_pages); > - err = -ENOMEM; > if (!data.ia) > - goto out; > + return; > > - err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); > - if (!err) { > - if (data.ia->ap.num_pages) > - fuse_send_readpages(data.ia, file); > - else > - fuse_io_free(data.ia); > + while ((page = readahead_page(rac))) { > + if (fuse_readpages_fill(&data, page) != 0) Shouldn't this unlock + put page on error? Otherwise looks good. Thanks, Miklos
On Wed, Mar 25, 2020 at 10:42:56AM +0100, Miklos Szeredi wrote: > > + while ((page = readahead_page(rac))) { > > + if (fuse_readpages_fill(&data, page) != 0) > > Shouldn't this unlock + put page on error? We're certainly inconsistent between the two error exits from fuse_readpages_fill(). But I think we can simplify the whole thing ... how does this look to you? diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5749505bcff6..57ea9a364e62 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -915,76 +915,32 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) fuse_readpages_end(fc, &ap->args, err); } -struct fuse_fill_data { - struct fuse_io_args *ia; - struct file *file; - struct inode *inode; - unsigned int nr_pages; - unsigned int max_pages; -}; - -static int fuse_readpages_fill(struct fuse_fill_data *data, struct page *page) -{ - struct fuse_io_args *ia = data->ia; - struct fuse_args_pages *ap = &ia->ap; - struct inode *inode = data->inode; - struct fuse_conn *fc = get_fuse_conn(inode); - - fuse_wait_on_page_writeback(inode, page->index); - - if (ap->num_pages && - (ap->num_pages == fc->max_pages || - (ap->num_pages + 1) * PAGE_SIZE > fc->max_read || - ap->pages[ap->num_pages - 1]->index + 1 != page->index)) { - data->max_pages = min_t(unsigned int, data->nr_pages, - fc->max_pages); - fuse_send_readpages(ia, data->file); - data->ia = ia = fuse_io_alloc(NULL, data->max_pages); - if (!ia) - return -ENOMEM; - ap = &ia->ap; - } - - if (WARN_ON(ap->num_pages >= data->max_pages)) { - unlock_page(page); - fuse_io_free(ia); - return -EIO; - } - - ap->pages[ap->num_pages] = page; - ap->descs[ap->num_pages].length = PAGE_SIZE; - ap->num_pages++; - data->nr_pages--; - return 0; -} - static void fuse_readahead(struct readahead_control *rac) { struct inode *inode = rac->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_fill_data data; - struct page *page; if (is_bad_inode(inode)) return; - data.file = rac->file; - data.inode = inode; - data.nr_pages = readahead_count(rac); - data.max_pages = min_t(unsigned int, data.nr_pages, fc->max_pages); - data.ia = fuse_io_alloc(NULL, data.max_pages); - if (!data.ia) - return; + while (readahead_count(rac)) { + struct fuse_io_args *ia; + struct fuse_args_pages *ap; + unsigned int i, nr_pages; - while ((page = readahead_page(rac))) { - if (fuse_readpages_fill(&data, page) != 0) + nr_pages = min(readahead_count(rac), fc->max_pages); + ia = fuse_io_alloc(NULL, nr_pages); + if (!ia) return; + ap = &ia->ap; + __readahead_batch(rac, ap->pages, nr_pages); + for (i = 0; i < nr_pages; i++) { + fuse_wait_on_page_writeback(inode, ap->pages[i]->index); + ap->descs[i].length = PAGE_SIZE; + } + ap->num_pages = nr_pages; + fuse_send_readpages(ia, rac->file); } - - if (data.ia->ap.num_pages) - fuse_send_readpages(data.ia, rac->file); - else - fuse_io_free(data.ia); } static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
On Wed, Mar 25, 2020 at 1:02 PM Matthew Wilcox <willy@infradead.org> wrote: > > On Wed, Mar 25, 2020 at 10:42:56AM +0100, Miklos Szeredi wrote: > > > + while ((page = readahead_page(rac))) { > > > + if (fuse_readpages_fill(&data, page) != 0) > > > > Shouldn't this unlock + put page on error? > > We're certainly inconsistent between the two error exits from > fuse_readpages_fill(). But I think we can simplify the whole thing > ... how does this look to you? Nice, overall. > > - while ((page = readahead_page(rac))) { > - if (fuse_readpages_fill(&data, page) != 0) > + nr_pages = min(readahead_count(rac), fc->max_pages); Missing fc->max_read clamp. > + ia = fuse_io_alloc(NULL, nr_pages); > + if (!ia) > return; > + ap = &ia->ap; > + __readahead_batch(rac, ap->pages, nr_pages); nr_pages = __readahead_batch(...)? This will give consecutive pages, right? Thanks, Miklos
On Wed, Mar 25, 2020 at 03:43:02PM +0100, Miklos Szeredi wrote: > > > > - while ((page = readahead_page(rac))) { > > - if (fuse_readpages_fill(&data, page) != 0) > > + nr_pages = min(readahead_count(rac), fc->max_pages); > > Missing fc->max_read clamp. Yeah, I realised that. I ended up doing ... + unsigned int i, max_pages, nr_pages = 0; ... + max_pages = min(fc->max_pages, fc->max_read / PAGE_SIZE); > > + ia = fuse_io_alloc(NULL, nr_pages); > > + if (!ia) > > return; > > + ap = &ia->ap; > > + __readahead_batch(rac, ap->pages, nr_pages); > > nr_pages = __readahead_batch(...)? That's the other bug ... this was designed for btrfs which has a fixed-size buffer. But you want to dynamically allocate fuse_io_args(), so we need to figure out the number of pages beforehand, which is a little awkward. I've settled on this for the moment: for (;;) { struct fuse_io_args *ia; struct fuse_args_pages *ap; nr_pages = readahead_count(rac) - nr_pages; if (nr_pages > max_pages) nr_pages = max_pages; if (nr_pages == 0) break; ia = fuse_io_alloc(NULL, nr_pages); if (!ia) return; ap = &ia->ap; __readahead_batch(rac, ap->pages, nr_pages); for (i = 0; i < nr_pages; i++) { fuse_wait_on_page_writeback(inode, readahead_index(rac) + i); ap->descs[i].length = PAGE_SIZE; } ap->num_pages = nr_pages; fuse_send_readpages(ia, rac->file); } but I'm not entirely happy with that either. Pondering better options. > This will give consecutive pages, right? readpages() was already being called with consecutive pages. Several filesystems had code to cope with the pages being non-consecutive, but that wasn't how the core code worked; if there was a discontiguity it would send off the pages that were consecutive and start a new batch. __readahead_batch() can't return fewer than nr_pages, so you don't need to check for that.
On Wed, Mar 25, 2020 at 4:32 PM Matthew Wilcox <willy@infradead.org> wrote: > > On Wed, Mar 25, 2020 at 03:43:02PM +0100, Miklos Szeredi wrote: > > > > > > - while ((page = readahead_page(rac))) { > > > - if (fuse_readpages_fill(&data, page) != 0) > > > + nr_pages = min(readahead_count(rac), fc->max_pages); > > > > Missing fc->max_read clamp. > > Yeah, I realised that. I ended up doing ... > > + unsigned int i, max_pages, nr_pages = 0; > ... > + max_pages = min(fc->max_pages, fc->max_read / PAGE_SIZE); > > > > + ia = fuse_io_alloc(NULL, nr_pages); > > > + if (!ia) > > > return; > > > + ap = &ia->ap; > > > + __readahead_batch(rac, ap->pages, nr_pages); > > > > nr_pages = __readahead_batch(...)? > > That's the other bug ... this was designed for btrfs which has a fixed-size > buffer. But you want to dynamically allocate fuse_io_args(), so we need to > figure out the number of pages beforehand, which is a little awkward. I've > settled on this for the moment: > > for (;;) { > struct fuse_io_args *ia; > struct fuse_args_pages *ap; > > nr_pages = readahead_count(rac) - nr_pages; > if (nr_pages > max_pages) > nr_pages = max_pages; > if (nr_pages == 0) > break; > ia = fuse_io_alloc(NULL, nr_pages); > if (!ia) > return; > ap = &ia->ap; > __readahead_batch(rac, ap->pages, nr_pages); > for (i = 0; i < nr_pages; i++) { > fuse_wait_on_page_writeback(inode, > readahead_index(rac) + i); > ap->descs[i].length = PAGE_SIZE; > } > ap->num_pages = nr_pages; > fuse_send_readpages(ia, rac->file); > } > > but I'm not entirely happy with that either. Pondering better options. I think that's fine. Note how the original code possibly over-allocates the the page array, because it doesn't know the batch size beforehand. So this is already better. > > > This will give consecutive pages, right? > > readpages() was already being called with consecutive pages. Several > filesystems had code to cope with the pages being non-consecutive, but > that wasn't how the core code worked; if there was a discontiguity it > would send off the pages that were consecutive and start a new batch. > > __readahead_batch() can't return fewer than nr_pages, so you don't need > to check for that. That's far from obvious. I'd put a WARN_ON at least to make document the fact. Thanks, Miklos
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 9d67b830fb7a..5749505bcff6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -923,9 +923,8 @@ struct fuse_fill_data { unsigned int max_pages; }; -static int fuse_readpages_fill(void *_data, struct page *page) +static int fuse_readpages_fill(struct fuse_fill_data *data, struct page *page) { - struct fuse_fill_data *data = _data; struct fuse_io_args *ia = data->ia; struct fuse_args_pages *ap = &ia->ap; struct inode *inode = data->inode; @@ -941,10 +940,8 @@ static int fuse_readpages_fill(void *_data, struct page *page) fc->max_pages); fuse_send_readpages(ia, data->file); data->ia = ia = fuse_io_alloc(NULL, data->max_pages); - if (!ia) { - unlock_page(page); + if (!ia) return -ENOMEM; - } ap = &ia->ap; } @@ -954,7 +951,6 @@ static int fuse_readpages_fill(void *_data, struct page *page) return -EIO; } - get_page(page); ap->pages[ap->num_pages] = page; ap->descs[ap->num_pages].length = PAGE_SIZE; ap->num_pages++; @@ -962,37 +958,33 @@ static int fuse_readpages_fill(void *_data, struct page *page) return 0; } -static int fuse_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void fuse_readahead(struct readahead_control *rac) { - struct inode *inode = mapping->host; + struct inode *inode = rac->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_fill_data data; - int err; + struct page *page; - err = -EIO; if (is_bad_inode(inode)) - goto out; + return; - data.file = file; + data.file = rac->file; data.inode = inode; - data.nr_pages = nr_pages; - data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages); -; + data.nr_pages = readahead_count(rac); + data.max_pages = min_t(unsigned int, data.nr_pages, fc->max_pages); data.ia = fuse_io_alloc(NULL, data.max_pages); - err = -ENOMEM; if (!data.ia) - goto out; + return; - err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); - if (!err) { - if (data.ia->ap.num_pages) - fuse_send_readpages(data.ia, file); - else - fuse_io_free(data.ia); + while ((page = readahead_page(rac))) { + if (fuse_readpages_fill(&data, page) != 0) + return; } -out: - return err; + + if (data.ia->ap.num_pages) + fuse_send_readpages(data.ia, rac->file); + else + fuse_io_free(data.ia); } static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -3373,10 +3365,10 @@ static const struct file_operations fuse_file_operations = { static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, + .readahead = fuse_readahead, .writepage = fuse_writepage, .writepages = fuse_writepages, .launder_page = fuse_launder_page, - .readpages = fuse_readpages, .set_page_dirty = __set_page_dirty_nobuffers, .bmap = fuse_bmap, .direct_IO = fuse_direct_IO,