diff mbox series

[v1,04/10] NFS: Keep the readdir pagecache cursor updated

Message ID f803021382040dba38ce8414ed1db8e400c0cc49.1611160121.git.bcodding@redhat.com (mailing list archive)
State New, archived
Headers show
Series NFS client readdir per-page validation | expand

Commit Message

Benjamin Coddington Jan. 20, 2021, 4:59 p.m. UTC
Whenever we successfully locate our dir_cookie within the pagecache, or
finish emitting entries to userspace, update the pagecache cursor.  These
updates provide marker points to validate pagecache pages in a future
patch.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
---
 fs/nfs/dir.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

Comments

Trond Myklebust Jan. 21, 2021, 8 p.m. UTC | #1
On Wed, 2021-01-20 at 11:59 -0500, Benjamin Coddington wrote:
> Whenever we successfully locate our dir_cookie within the pagecache,
> or
> finish emitting entries to userspace, update the pagecache cursor. 
> These
> updates provide marker points to validate pagecache pages in a future
> patch.

How isn't this going to end up subject to the exact same problem that
Dave Wysochanski's patchset had?

> 
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> ---
>  fs/nfs/dir.c | 29 +++++++++++++++++++++++++----
>  1 file changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 6626aff9f54d..7f6c84c8a412 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -150,6 +150,10 @@ struct nfs_cache_array {
>         struct nfs_cache_array_entry array[];
>  };
>  
> +static const int cache_entries_per_page =
> +       (PAGE_SIZE - sizeof(struct nfs_cache_array)) /
> +       sizeof(struct nfs_cache_array_entry);
> +
>  struct nfs_readdir_descriptor {
>         struct file     *file;
>         struct page     *page;
> @@ -251,6 +255,21 @@ static bool nfs_readdir_array_is_full(struct
> nfs_cache_array *array)
>         return array->page_full;
>  }
>  
> +static void nfs_readdir_set_cursor(struct nfs_readdir_descriptor
> *desc, int index)
> +{
> +       desc->pgc.entry_index = index;
> +       desc->pgc.index_cookie = desc->dir_cookie;
> +}
> +
> +static void nfs_readdir_cursor_next(struct nfs_dir_page_cursor *pgc,
> u64 cookie)
> +{
> +       pgc->index_cookie = cookie;
> +       if (++pgc->entry_index == cache_entries_per_page) {
> +               pgc->entry_index = 0;
> +               pgc->page_index++;
> +       }
> +}
> +
>  /*
>   * the caller is responsible for freeing qstr.name
>   * when called by nfs_readdir_add_to_array, the strings will be
> freed in
> @@ -424,7 +443,7 @@ static int nfs_readdir_search_for_pos(struct
> nfs_cache_array *array,
>  
>         index = (unsigned int)diff;
>         desc->dir_cookie = array->array[index].cookie;
> -       desc->pgc.entry_index = index;
> +       nfs_readdir_set_cursor(desc, index);
>         return 0;
>  out_eof:
>         desc->eof = true;
> @@ -492,7 +511,7 @@ static int nfs_readdir_search_for_cookie(struct
> nfs_cache_array *array,
>                         else
>                                 desc->ctx->pos = new_pos;
>                         desc->prev_index = new_pos;
> -                       desc->pgc.entry_index = i;
> +                       nfs_readdir_set_cursor(desc, i);
>                         return 0;
>                 }
>         }
> @@ -519,9 +538,9 @@ static int nfs_readdir_search_array(struct
> nfs_readdir_descriptor *desc)
>                 status = nfs_readdir_search_for_cookie(array, desc);
>  
>         if (status == -EAGAIN) {
> -               desc->pgc.index_cookie = array->last_cookie;
> +               desc->pgc.entry_index = array->size - 1;
> +               nfs_readdir_cursor_next(&desc->pgc, array-
> >last_cookie);
>                 desc->current_index += array->size;
> -               desc->pgc.page_index++;
>         }
>         kunmap_atomic(array);
>         return status;
> @@ -1035,6 +1054,8 @@ static void nfs_do_filldir(struct
> nfs_readdir_descriptor *desc)
>                 desc->eof = true;
>  
>         kunmap(desc->page);
> +       desc->pgc.entry_index = i-1;
> +       nfs_readdir_cursor_next(&desc->pgc, desc->dir_cookie);
>         dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @
> cookie %llu\n",
>                         (unsigned long long)desc->dir_cookie);
>  }
Trond Myklebust Jan. 21, 2021, 8:11 p.m. UTC | #2
On Thu, 2021-01-21 at 20:00 +0000, Trond Myklebust wrote:
> On Wed, 2021-01-20 at 11:59 -0500, Benjamin Coddington wrote:
> > Whenever we successfully locate our dir_cookie within the
> > pagecache,
> > or
> > finish emitting entries to userspace, update the pagecache cursor. 
> > These
> > updates provide marker points to validate pagecache pages in a
> > future
> > patch.
> 
> How isn't this going to end up subject to the exact same problem that
> Dave Wysochanski's patchset had?

IOW: how is this not also making invalid assumptions around page cache
layout stability across READDIR calls?
Benjamin Coddington Jan. 22, 2021, 12:21 a.m. UTC | #3
On 21 Jan 2021, at 15:11, Trond Myklebust wrote:

> On Thu, 2021-01-21 at 20:00 +0000, Trond Myklebust wrote:
>> On Wed, 2021-01-20 at 11:59 -0500, Benjamin Coddington wrote:
>>> Whenever we successfully locate our dir_cookie within the
>>> pagecache,
>>> or
>>> finish emitting entries to userspace, update the pagecache cursor. 
>>> These
>>> updates provide marker points to validate pagecache pages in a
>>> future
>>> patch.
>>
>> How isn't this going to end up subject to the exact same problem that
>> Dave Wysochanski's patchset had?
>
> IOW: how is this not also making invalid assumptions around page cache
> layout stability across READDIR calls?

IIRC, Dave's approach was to store the index along with dir_cookie in
order to skip having to re-fill the cache to resume a listing.  That
approach assumed that the index still referred to page data aligned
with the current reader's dir_cookie.  But in between calls to
nfs_readdir() it would be possible for another reader to fill the cache 
with
a different alignment due to directory changes.  In which case the index 
is not
going to point to the next entry, we'll either skip entries or repeat 
them.

With this approach, we don't assume this is the case.  For every page 
with
data, the alignment of the entries is verified to be in the same 
position
as that reader's last pass through.  If not, the page data is discarded 
and
refreshed with a new READDIR op - just like an uncached_readdir path, 
but we
still fill the pagecache.  Every READDIR also updates the change_attr, 
so
even if there are cached pages beyond our current location that match 
our
alignment, we detect the case where those pages are actually invalid due 
to
changes on the server, and we re-fill them.

Another way to think about this is that instead of trying to cache the 
complete
representation of the directory aligned to the first entry in the 
pagecache,
we're instead just caching the results of any READDIR at convenient 
offsets
in the pagecache for other readers that might follow.  The READDIR 
results
are only usable if they match the current version of the directory and 
their
alignment is correct.

If you were to lseek to nearly the end of a directory, the first call to
nfs_readdir() will end up with results of a READDIR hitting the cache 
and
filling page->index 0.  That's ok because any other reader coming along 
with
a different alignment will discard that data and refresh it. We are 
going to
create a performance penalty for two readers that want to regularly fill
entries at different alignments, but I think that case is probably 
fairly
rare.  I am making a guess, but I think the most common usage of readdir 
is
by readers that want to traverse the entire directory in order.

So for that case all the readers benefit from the work of other 
processes,
the cache can be filled in parallel, and readers at the beginning don't
prevent readers at the end from filling in entries.  We no longer have 
to
worry whether we have enough memory to list a directory, or play games 
with
timeouts.

Ben
diff mbox series

Patch

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 6626aff9f54d..7f6c84c8a412 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -150,6 +150,10 @@  struct nfs_cache_array {
 	struct nfs_cache_array_entry array[];
 };
 
+static const int cache_entries_per_page =
+	(PAGE_SIZE - sizeof(struct nfs_cache_array)) /
+	sizeof(struct nfs_cache_array_entry);
+
 struct nfs_readdir_descriptor {
 	struct file	*file;
 	struct page	*page;
@@ -251,6 +255,21 @@  static bool nfs_readdir_array_is_full(struct nfs_cache_array *array)
 	return array->page_full;
 }
 
+static void nfs_readdir_set_cursor(struct nfs_readdir_descriptor *desc, int index)
+{
+	desc->pgc.entry_index = index;
+	desc->pgc.index_cookie = desc->dir_cookie;
+}
+
+static void nfs_readdir_cursor_next(struct nfs_dir_page_cursor *pgc, u64 cookie)
+{
+	pgc->index_cookie = cookie;
+	if (++pgc->entry_index == cache_entries_per_page) {
+		pgc->entry_index = 0;
+		pgc->page_index++;
+	}
+}
+
 /*
  * the caller is responsible for freeing qstr.name
  * when called by nfs_readdir_add_to_array, the strings will be freed in
@@ -424,7 +443,7 @@  static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
 
 	index = (unsigned int)diff;
 	desc->dir_cookie = array->array[index].cookie;
-	desc->pgc.entry_index = index;
+	nfs_readdir_set_cursor(desc, index);
 	return 0;
 out_eof:
 	desc->eof = true;
@@ -492,7 +511,7 @@  static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
 			else
 				desc->ctx->pos = new_pos;
 			desc->prev_index = new_pos;
-			desc->pgc.entry_index = i;
+			nfs_readdir_set_cursor(desc, i);
 			return 0;
 		}
 	}
@@ -519,9 +538,9 @@  static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
 		status = nfs_readdir_search_for_cookie(array, desc);
 
 	if (status == -EAGAIN) {
-		desc->pgc.index_cookie = array->last_cookie;
+		desc->pgc.entry_index = array->size - 1;
+		nfs_readdir_cursor_next(&desc->pgc, array->last_cookie);
 		desc->current_index += array->size;
-		desc->pgc.page_index++;
 	}
 	kunmap_atomic(array);
 	return status;
@@ -1035,6 +1054,8 @@  static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
 		desc->eof = true;
 
 	kunmap(desc->page);
+	desc->pgc.entry_index = i-1;
+	nfs_readdir_cursor_next(&desc->pgc, desc->dir_cookie);
 	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
 			(unsigned long long)desc->dir_cookie);
 }