diff mbox series

[01/11] fuse: convert readahead to use folios

Message ID aa88eb029f768dddef5c7ef94bb1fde007b4bee0.1724791233.git.josef@toxicpanda.com (mailing list archive)
State New
Headers show
Series fuse: convert to using folios and iomap | expand

Commit Message

Josef Bacik Aug. 27, 2024, 8:45 p.m. UTC
Currently we're using the __readahead_batch() helper which populates our
fuse_args_pages->pages array with pages.  Convert this to use the newer
folio based pattern which is to call readahead_folio() to get the next
folio in the read ahead batch.  I've updated the code to use things like
folio_size() and to take into account larger folio sizes, but this is
purely to make that eventual work easier to do, we currently will not
get large folios so this is more future proofing than actual support.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/fuse/file.c | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

Comments

Matthew Wilcox Aug. 27, 2024, 9:46 p.m. UTC | #1
On Tue, Aug 27, 2024 at 04:45:14PM -0400, Josef Bacik wrote:
>  
> +/*
> + * Wait for page writeback in the range to be completed.  This will work for
> + * folio_size() > PAGE_SIZE, even tho we don't currently allow that.
> + */
> +static void fuse_wait_on_folio_writeback(struct inode *inode,
> +					 struct folio *folio)
> +{
> +	for (pgoff_t index = folio_index(folio);
> +	     index < folio_next_index(folio); index++)
> +		fuse_wait_on_page_writeback(inode, index);
> +}

Would it be better to write this as:

	struct fuse_inode *fi = get_fuse_inode(inode);
	pgoff_t last = folio_next_index(folio) - 1;

	wait_event(fi->page_waitq, !fuse_range_is_writeback(inode,
				folio->index, last));

> @@ -1015,13 +1036,14 @@ static void fuse_readahead(struct readahead_control *rac)
>  		if (!ia)
>  			return;
>  		ap = &ia->ap;
> -		nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
> -		for (i = 0; i < nr_pages; i++) {
> -			fuse_wait_on_page_writeback(inode,
> -						    readahead_index(rac) + i);
> -			ap->descs[i].length = PAGE_SIZE;
> +
> +		while (nr_folios < nr_pages &&
> +		       (folio = readahead_folio(rac)) != NULL) {
> +			fuse_wait_on_folio_writeback(inode, folio);

Oh.  Even easier, we should hoist the whole thing to here.  Before
this loop,

		pgoff_t first = readahead_index(rac);
		pgoff_t last = first + readahead_count(rac) - 1;
		wait_event(fi->page_waitq, !fuse_range_is_writeback(inode,
				first, last);

(I'm not quite sure how we might have pending writeback still when we're
doing readahead, but fuse is a funny creature and if somebody explains
why to me, I'll probably forget again)

> +			ap->pages[i] = &folio->page;
> +			ap->descs[i].length = folio_size(folio);
> +			ap->num_pages++;

I do want to turn __readahead_batch into working on folios, but that
involves working on fuse & squashfs at the same time ... I see you
got rid of the readahead_page_batch() in btrfs recently; that'll help.
Josef Bacik Aug. 27, 2024, 10:23 p.m. UTC | #2
On Tue, Aug 27, 2024 at 10:46:57PM +0100, Matthew Wilcox wrote:
> On Tue, Aug 27, 2024 at 04:45:14PM -0400, Josef Bacik wrote:
> >  
> > +/*
> > + * Wait for page writeback in the range to be completed.  This will work for
> > + * folio_size() > PAGE_SIZE, even tho we don't currently allow that.
> > + */
> > +static void fuse_wait_on_folio_writeback(struct inode *inode,
> > +					 struct folio *folio)
> > +{
> > +	for (pgoff_t index = folio_index(folio);
> > +	     index < folio_next_index(folio); index++)
> > +		fuse_wait_on_page_writeback(inode, index);
> > +}
> 
> Would it be better to write this as:
> 
> 	struct fuse_inode *fi = get_fuse_inode(inode);
> 	pgoff_t last = folio_next_index(folio) - 1;
> 
> 	wait_event(fi->page_waitq, !fuse_range_is_writeback(inode,
> 				folio->index, last));
> 
> > @@ -1015,13 +1036,14 @@ static void fuse_readahead(struct readahead_control *rac)
> >  		if (!ia)
> >  			return;
> >  		ap = &ia->ap;
> > -		nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
> > -		for (i = 0; i < nr_pages; i++) {
> > -			fuse_wait_on_page_writeback(inode,
> > -						    readahead_index(rac) + i);
> > -			ap->descs[i].length = PAGE_SIZE;
> > +
> > +		while (nr_folios < nr_pages &&
> > +		       (folio = readahead_folio(rac)) != NULL) {
> > +			fuse_wait_on_folio_writeback(inode, folio);
> 
> Oh.  Even easier, we should hoist the whole thing to here.  Before
> this loop,
> 
> 		pgoff_t first = readahead_index(rac);
> 		pgoff_t last = first + readahead_count(rac) - 1;
> 		wait_event(fi->page_waitq, !fuse_range_is_writeback(inode,
> 				first, last);
> 
> (I'm not quite sure how we might have pending writeback still when we're
> doing readahead, but fuse is a funny creature and if somebody explains
> why to me, I'll probably forget again)
> 

Ah good suggestion, I like this better.  I didn't read carefully enough and
thought the waitqueue was on the writeback struct.  I'll rework it in the
morning and re-send once the tests run again.

> > +			ap->pages[i] = &folio->page;
> > +			ap->descs[i].length = folio_size(folio);
> > +			ap->num_pages++;
> 
> I do want to turn __readahead_batch into working on folios, but that
> involves working on fuse & squashfs at the same time ... I see you
> got rid of the readahead_page_batch() in btrfs recently; that'll help.

Do you want me to tackle that since I'm messing around in this area anyway?  My
only hesitation is we're limited to the 32 folios or whatever the pagevec count
is nowadays, and we may want to cycle through more.  But I've just finished
eating dinner and haven't actually looked at anything yet.  Thanks,

Josef
diff mbox series

Patch

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 88f872c02349..5024bc5a1da2 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -483,6 +483,18 @@  static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
 	wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
 }
 
+/*
+ * Wait for page writeback in the range to be completed.  This will work for
+ * folio_size() > PAGE_SIZE, even tho we don't currently allow that.
+ */
+static void fuse_wait_on_folio_writeback(struct inode *inode,
+					 struct folio *folio)
+{
+	for (pgoff_t index = folio_index(folio);
+	     index < folio_next_index(folio); index++)
+		fuse_wait_on_page_writeback(inode, index);
+}
+
 /*
  * Wait for all pending writepages on the inode to finish.
  *
@@ -997,6 +1009,8 @@  static void fuse_readahead(struct readahead_control *rac)
 	for (;;) {
 		struct fuse_io_args *ia;
 		struct fuse_args_pages *ap;
+		struct folio *folio;
+		unsigned nr_folios = 0;
 
 		if (fc->num_background >= fc->congestion_threshold &&
 		    rac->ra->async_size >= readahead_count(rac))
@@ -1006,7 +1020,14 @@  static void fuse_readahead(struct readahead_control *rac)
 			 */
 			break;
 
-		nr_pages = readahead_count(rac) - nr_pages;
+		/*
+		 * readahead_folio() updates the readahead_count(), so this will
+		 * always return the remaining pages count.  NOTE: this is in
+		 * PAGE_SIZE increments, which for now we do not support large
+		 * folios, but in the future we could end up with 1 folio
+		 * covering multiple PAGE_SIZE increments.
+		 */
+		nr_pages = readahead_count(rac);
 		if (nr_pages > max_pages)
 			nr_pages = max_pages;
 		if (nr_pages == 0)
@@ -1015,13 +1036,14 @@  static void fuse_readahead(struct readahead_control *rac)
 		if (!ia)
 			return;
 		ap = &ia->ap;
-		nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
-		for (i = 0; i < nr_pages; i++) {
-			fuse_wait_on_page_writeback(inode,
-						    readahead_index(rac) + i);
-			ap->descs[i].length = PAGE_SIZE;
+
+		while (nr_folios < nr_pages &&
+		       (folio = readahead_folio(rac)) != NULL) {
+			fuse_wait_on_folio_writeback(inode, folio);
+			ap->pages[i] = &folio->page;
+			ap->descs[i].length = folio_size(folio);
+			ap->num_pages++;
 		}
-		ap->num_pages = nr_pages;
 		fuse_send_readpages(ia, rac->file);
 	}
 }