[RFC] nfs: optimise readdir cache page invalidation
diff mbox series

Message ID 20200122234853.101576-1-dai.ngo@oracle.com
State New
Headers show
Series
  • [RFC] nfs: optimise readdir cache page invalidation
Related show

Commit Message

Dai Ngo Jan. 22, 2020, 11:48 p.m. UTC
When the directory is large and it's being modified by one client
while another client is doing the 'ls -l' on the same directory then
the cache page invalidation from nfs_force_use_readdirplus causes
the reading client to keep restarting READDIRPLUS from cookie 0
which causes the 'ls -l' to take a very long time to complete,
possibly never completing.

Currently when nfs_force_use_readdirplus is called to switch from
READDIR to READDIRPLUS, it invalidates all the cached pages of the
directory. This cache page invalidation causes the next nfs_readdir
to re-read the directory content from cookie 0.

This patch is to optimise the cache invalidation in
nfs_force_use_readdirplus by only truncating the cached pages from
last page index accessed to the end the file. It also marks the
inode to delay invalidating all the cached page of the directory
until the next initial nfs_readdir of the next 'ls' instance.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>

---
 fs/nfs/dir.c           | 14 +++++++++++++-
 include/linux/nfs_fs.h |  3 +++
 2 files changed, 16 insertions(+), 1 deletion(-)

Comments

Trond Myklebust Jan. 23, 2020, 12:10 a.m. UTC | #1
On Wed, 2020-01-22 at 18:48 -0500, Dai Ngo wrote:
> When the directory is large and it's being modified by one client
> while another client is doing the 'ls -l' on the same directory then
> the cache page invalidation from nfs_force_use_readdirplus causes
> the reading client to keep restarting READDIRPLUS from cookie 0
> which causes the 'ls -l' to take a very long time to complete,
> possibly never completing.
> 
> Currently when nfs_force_use_readdirplus is called to switch from
> READDIR to READDIRPLUS, it invalidates all the cached pages of the
> directory. This cache page invalidation causes the next nfs_readdir
> to re-read the directory content from cookie 0.
> 
> This patch is to optimise the cache invalidation in
> nfs_force_use_readdirplus by only truncating the cached pages from
> last page index accessed to the end the file. It also marks the
> inode to delay invalidating all the cached page of the directory
> until the next initial nfs_readdir of the next 'ls' instance.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> 
> ---
>  fs/nfs/dir.c           | 14 +++++++++++++-
>  include/linux/nfs_fs.h |  3 +++
>  2 files changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index e180033e35cf..3fbf2e41d523 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -437,7 +437,10 @@ void nfs_force_use_readdirplus(struct inode
> *dir)
>  	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
>  	    !list_empty(&nfsi->open_files)) {
>  		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> -		invalidate_mapping_pages(dir->i_mapping, 0, -1);
> +		nfs_zap_mapping(dir, dir->i_mapping);
> +		if (NFS_PROTO(dir)->version == 3)
> +			invalidate_mapping_pages(dir->i_mapping,
> +				nfsi->page_index + 1, -1);

Let's do this for NFSv4 as well. The motivation is the same: to allow
us to replace a series of GETATTR calls to one or more READDIRPLUS
calls.

>  	}
>  }
>  
> @@ -709,6 +712,9 @@ struct page
> *get_cache_page(nfs_readdir_descriptor_t *desc)
>  int find_cache_page(nfs_readdir_descriptor_t *desc)
>  {
>  	int res;
> +	struct inode *inode;
> +	struct nfs_inode *nfsi;
> +	struct dentry *dentry;
>  
>  	desc->page = get_cache_page(desc);
>  	if (IS_ERR(desc->page))
> @@ -717,6 +723,12 @@ int find_cache_page(nfs_readdir_descriptor_t
> *desc)
>  	res = nfs_readdir_search_array(desc);
>  	if (res != 0)
>  		cache_page_release(desc);
> +
> +	dentry = file_dentry(desc->file);
> +	inode = d_inode(dentry);
> +	nfsi = NFS_I(inode);
> +	nfsi->page_index = desc->page_index;
> +
>  	return res;
>  }
>  
> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> index c06b1fd130f3..a5f8f03ecd59 100644
> --- a/include/linux/nfs_fs.h
> +++ b/include/linux/nfs_fs.h
> @@ -168,6 +168,9 @@ struct nfs_inode {
>  	struct rw_semaphore	rmdir_sem;
>  	struct mutex		commit_mutex;
>  
> +	/* track last access to cached pages */
> +	unsigned long		page_index;
> +
>  #if IS_ENABLED(CONFIG_NFS_V4)
>  	struct nfs4_cached_acl	*nfs4_acl;
>          /* NFSv4 state */

Otherwise, the patch looks good to me.
Trond Myklebust Jan. 23, 2020, 6:35 p.m. UTC | #2
On Wed, 2020-01-22 at 18:48 -0500, Dai Ngo wrote:
> When the directory is large and it's being modified by one client
> while another client is doing the 'ls -l' on the same directory then
> the cache page invalidation from nfs_force_use_readdirplus causes
> the reading client to keep restarting READDIRPLUS from cookie 0
> which causes the 'ls -l' to take a very long time to complete,
> possibly never completing.
> 
> Currently when nfs_force_use_readdirplus is called to switch from
> READDIR to READDIRPLUS, it invalidates all the cached pages of the
> directory. This cache page invalidation causes the next nfs_readdir
> to re-read the directory content from cookie 0.
> 
> This patch is to optimise the cache invalidation in
> nfs_force_use_readdirplus by only truncating the cached pages from
> last page index accessed to the end the file. It also marks the
> inode to delay invalidating all the cached page of the directory
> until the next initial nfs_readdir of the next 'ls' instance.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>

Reviewed-by: Trond Myklebust <trond.myklebust@hammerspace.com>

> 
> ---
>  fs/nfs/dir.c           | 14 +++++++++++++-
>  include/linux/nfs_fs.h |  3 +++
>  2 files changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index e180033e35cf..3fbf2e41d523 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -437,7 +437,10 @@ void nfs_force_use_readdirplus(struct inode
> *dir)
>  	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
>  	    !list_empty(&nfsi->open_files)) {
>  		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> -		invalidate_mapping_pages(dir->i_mapping, 0, -1);
> +		nfs_zap_mapping(dir, dir->i_mapping);
> +		if (NFS_PROTO(dir)->version == 3)
> +			invalidate_mapping_pages(dir->i_mapping,
> +				nfsi->page_index + 1, -1);
>  	}
>  }
>  
> @@ -709,6 +712,9 @@ struct page
> *get_cache_page(nfs_readdir_descriptor_t *desc)
>  int find_cache_page(nfs_readdir_descriptor_t *desc)
>  {
>  	int res;
> +	struct inode *inode;
> +	struct nfs_inode *nfsi;
> +	struct dentry *dentry;
>  
>  	desc->page = get_cache_page(desc);
>  	if (IS_ERR(desc->page))
> @@ -717,6 +723,12 @@ int find_cache_page(nfs_readdir_descriptor_t
> *desc)
>  	res = nfs_readdir_search_array(desc);
>  	if (res != 0)
>  		cache_page_release(desc);
> +
> +	dentry = file_dentry(desc->file);
> +	inode = d_inode(dentry);
> +	nfsi = NFS_I(inode);
> +	nfsi->page_index = desc->page_index;
> +
>  	return res;
>  }
>  
> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> index c06b1fd130f3..a5f8f03ecd59 100644
> --- a/include/linux/nfs_fs.h
> +++ b/include/linux/nfs_fs.h
> @@ -168,6 +168,9 @@ struct nfs_inode {
>  	struct rw_semaphore	rmdir_sem;
>  	struct mutex		commit_mutex;
>  
> +	/* track last access to cached pages */
> +	unsigned long		page_index;
> +
>  #if IS_ENABLED(CONFIG_NFS_V4)
>  	struct nfs4_cached_acl	*nfs4_acl;
>          /* NFSv4 state */
Trond Myklebust Jan. 23, 2020, 6:37 p.m. UTC | #3
On Thu, 2020-01-23 at 10:35 -0800, Trond Myklebust wrote:
> On Wed, 2020-01-22 at 18:48 -0500, Dai Ngo wrote:
> > When the directory is large and it's being modified by one client
> > while another client is doing the 'ls -l' on the same directory
> > then
> > the cache page invalidation from nfs_force_use_readdirplus causes
> > the reading client to keep restarting READDIRPLUS from cookie 0
> > which causes the 'ls -l' to take a very long time to complete,
> > possibly never completing.
> > 
> > Currently when nfs_force_use_readdirplus is called to switch from
> > READDIR to READDIRPLUS, it invalidates all the cached pages of the
> > directory. This cache page invalidation causes the next nfs_readdir
> > to re-read the directory content from cookie 0.
> > 
> > This patch is to optimise the cache invalidation in
> > nfs_force_use_readdirplus by only truncating the cached pages from
> > last page index accessed to the end the file. It also marks the
> > inode to delay invalidating all the cached page of the directory
> > until the next initial nfs_readdir of the next 'ls' instance.
> > 
> > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> 
> Reviewed-by: Trond Myklebust <trond.myklebust@hammerspace.com>

Sorry I intended this for the PATCHv2... I've looked through that
version and it looks OK to me.

Anna, please apply the later v2 instead of this one...

> 
> > ---
> >  fs/nfs/dir.c           | 14 +++++++++++++-
> >  include/linux/nfs_fs.h |  3 +++
> >  2 files changed, 16 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > index e180033e35cf..3fbf2e41d523 100644
> > --- a/fs/nfs/dir.c
> > +++ b/fs/nfs/dir.c
> > @@ -437,7 +437,10 @@ void nfs_force_use_readdirplus(struct inode
> > *dir)
> >  	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> >  	    !list_empty(&nfsi->open_files)) {
> >  		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> > -		invalidate_mapping_pages(dir->i_mapping, 0, -1);
> > +		nfs_zap_mapping(dir, dir->i_mapping);
> > +		if (NFS_PROTO(dir)->version == 3)
> > +			invalidate_mapping_pages(dir->i_mapping,
> > +				nfsi->page_index + 1, -1);
> >  	}
> >  }
> >  
> > @@ -709,6 +712,9 @@ struct page
> > *get_cache_page(nfs_readdir_descriptor_t *desc)
> >  int find_cache_page(nfs_readdir_descriptor_t *desc)
> >  {
> >  	int res;
> > +	struct inode *inode;
> > +	struct nfs_inode *nfsi;
> > +	struct dentry *dentry;
> >  
> >  	desc->page = get_cache_page(desc);
> >  	if (IS_ERR(desc->page))
> > @@ -717,6 +723,12 @@ int find_cache_page(nfs_readdir_descriptor_t
> > *desc)
> >  	res = nfs_readdir_search_array(desc);
> >  	if (res != 0)
> >  		cache_page_release(desc);
> > +
> > +	dentry = file_dentry(desc->file);
> > +	inode = d_inode(dentry);
> > +	nfsi = NFS_I(inode);
> > +	nfsi->page_index = desc->page_index;
> > +
> >  	return res;
> >  }
> >  
> > diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> > index c06b1fd130f3..a5f8f03ecd59 100644
> > --- a/include/linux/nfs_fs.h
> > +++ b/include/linux/nfs_fs.h
> > @@ -168,6 +168,9 @@ struct nfs_inode {
> >  	struct rw_semaphore	rmdir_sem;
> >  	struct mutex		commit_mutex;
> >  
> > +	/* track last access to cached pages */
> > +	unsigned long		page_index;
> > +
> >  #if IS_ENABLED(CONFIG_NFS_V4)
> >  	struct nfs4_cached_acl	*nfs4_acl;
> >          /* NFSv4 state */
Schumaker, Anna Jan. 23, 2020, 6:55 p.m. UTC | #4
On Thu, 2020-01-23 at 18:37 +0000, Trond Myklebust wrote:
> NetApp Security WARNING: This is an external email. Do not click links or open
> attachments unless you recognize the sender and know the content is safe.
> 
> 
> 
> 
> On Thu, 2020-01-23 at 10:35 -0800, Trond Myklebust wrote:
> > On Wed, 2020-01-22 at 18:48 -0500, Dai Ngo wrote:
> > > When the directory is large and it's being modified by one client
> > > while another client is doing the 'ls -l' on the same directory
> > > then
> > > the cache page invalidation from nfs_force_use_readdirplus causes
> > > the reading client to keep restarting READDIRPLUS from cookie 0
> > > which causes the 'ls -l' to take a very long time to complete,
> > > possibly never completing.
> > > 
> > > Currently when nfs_force_use_readdirplus is called to switch from
> > > READDIR to READDIRPLUS, it invalidates all the cached pages of the
> > > directory. This cache page invalidation causes the next nfs_readdir
> > > to re-read the directory content from cookie 0.
> > > 
> > > This patch is to optimise the cache invalidation in
> > > nfs_force_use_readdirplus by only truncating the cached pages from
> > > last page index accessed to the end the file. It also marks the
> > > inode to delay invalidating all the cached page of the directory
> > > until the next initial nfs_readdir of the next 'ls' instance.
> > > 
> > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > 
> > Reviewed-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> Sorry I intended this for the PATCHv2... I've looked through that
> version and it looks OK to me.
> 
> Anna, please apply the later v2 instead of this one...

I applied the v2 about 5 minutes before you sent this. I'll make sure your
Reviewed-by gets added to it, too :) 

Anna
> 
> > > ---
> > >  fs/nfs/dir.c           | 14 +++++++++++++-
> > >  include/linux/nfs_fs.h |  3 +++
> > >  2 files changed, 16 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > > index e180033e35cf..3fbf2e41d523 100644
> > > --- a/fs/nfs/dir.c
> > > +++ b/fs/nfs/dir.c
> > > @@ -437,7 +437,10 @@ void nfs_force_use_readdirplus(struct inode
> > > *dir)
> > >     if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> > >         !list_empty(&nfsi->open_files)) {
> > >             set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> > > -           invalidate_mapping_pages(dir->i_mapping, 0, -1);
> > > +           nfs_zap_mapping(dir, dir->i_mapping);
> > > +           if (NFS_PROTO(dir)->version == 3)
> > > +                   invalidate_mapping_pages(dir->i_mapping,
> > > +                           nfsi->page_index + 1, -1);
> > >     }
> > >  }
> > > 
> > > @@ -709,6 +712,9 @@ struct page
> > > *get_cache_page(nfs_readdir_descriptor_t *desc)
> > >  int find_cache_page(nfs_readdir_descriptor_t *desc)
> > >  {
> > >     int res;
> > > +   struct inode *inode;
> > > +   struct nfs_inode *nfsi;
> > > +   struct dentry *dentry;
> > > 
> > >     desc->page = get_cache_page(desc);
> > >     if (IS_ERR(desc->page))
> > > @@ -717,6 +723,12 @@ int find_cache_page(nfs_readdir_descriptor_t
> > > *desc)
> > >     res = nfs_readdir_search_array(desc);
> > >     if (res != 0)
> > >             cache_page_release(desc);
> > > +
> > > +   dentry = file_dentry(desc->file);
> > > +   inode = d_inode(dentry);
> > > +   nfsi = NFS_I(inode);
> > > +   nfsi->page_index = desc->page_index;
> > > +
> > >     return res;
> > >  }
> > > 
> > > diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> > > index c06b1fd130f3..a5f8f03ecd59 100644
> > > --- a/include/linux/nfs_fs.h
> > > +++ b/include/linux/nfs_fs.h
> > > @@ -168,6 +168,9 @@ struct nfs_inode {
> > >     struct rw_semaphore     rmdir_sem;
> > >     struct mutex            commit_mutex;
> > > 
> > > +   /* track last access to cached pages */
> > > +   unsigned long           page_index;
> > > +
> > >  #if IS_ENABLED(CONFIG_NFS_V4)
> > >     struct nfs4_cached_acl  *nfs4_acl;
> > >          /* NFSv4 state */
> --
> Trond Myklebust
> CTO, Hammerspace Inc
> 4300 El Camino Real, Suite 105
> Los Altos, CA 94022
> www.hammer.space
> 
>

Patch
diff mbox series

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e180033e35cf..3fbf2e41d523 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -437,7 +437,10 @@  void nfs_force_use_readdirplus(struct inode *dir)
 	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
 	    !list_empty(&nfsi->open_files)) {
 		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
-		invalidate_mapping_pages(dir->i_mapping, 0, -1);
+		nfs_zap_mapping(dir, dir->i_mapping);
+		if (NFS_PROTO(dir)->version == 3)
+			invalidate_mapping_pages(dir->i_mapping,
+				nfsi->page_index + 1, -1);
 	}
 }
 
@@ -709,6 +712,9 @@  struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
 int find_cache_page(nfs_readdir_descriptor_t *desc)
 {
 	int res;
+	struct inode *inode;
+	struct nfs_inode *nfsi;
+	struct dentry *dentry;
 
 	desc->page = get_cache_page(desc);
 	if (IS_ERR(desc->page))
@@ -717,6 +723,12 @@  int find_cache_page(nfs_readdir_descriptor_t *desc)
 	res = nfs_readdir_search_array(desc);
 	if (res != 0)
 		cache_page_release(desc);
+
+	dentry = file_dentry(desc->file);
+	inode = d_inode(dentry);
+	nfsi = NFS_I(inode);
+	nfsi->page_index = desc->page_index;
+
 	return res;
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c06b1fd130f3..a5f8f03ecd59 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -168,6 +168,9 @@  struct nfs_inode {
 	struct rw_semaphore	rmdir_sem;
 	struct mutex		commit_mutex;
 
+	/* track last access to cached pages */
+	unsigned long		page_index;
+
 #if IS_ENABLED(CONFIG_NFS_V4)
 	struct nfs4_cached_acl	*nfs4_acl;
         /* NFSv4 state */