Message ID | 934FC075-4393-42AD-92A3-3BC3BE580699@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> On Dec 7, 2016, at 17:34, Benjamin Coddington <bcodding@redhat.com> wrote: > > From: "Benjamin Coddington" <bcodding@redhat.com> > > So, I've got the following patch, and it seems to work fine for the original > workload. However, if I use ~20 procs started 2 seconds apart, I can still > sometimes get into the state where the machine takes a very long time (5 - 8 > minutes). I wonder if I am getting into a state were vmscan is reclaiming > pages while I'm trying to fill them up. So.. I'll do a bit more debugging > and re-post this if I feel like it is still the right approach. > > Adding an int to nfs_open_dir_context puts it at 60 bytes here. Probably > could have added a unsigned long flags and used bits for the duped stuff as > well, but it was uglier that way, so I left it. I like how duped carries > -1, 0, and >1 information without having flag defines cluttering everywhere. > > Ben > > 8<------------------------------------------------------------------------ > > From 5b3e747a984ec966e8c742d8f4fe4b08e1c93acd Mon Sep 17 00:00:00 2001 > Message-Id: <5b3e747a984ec966e8c742d8f4fe4b08e1c93acd.1481149380.git.bcodding@redhat.com> > From: Benjamin Coddington <bcodding@redhat.com> > Date: Wed, 7 Dec 2016 16:23:30 -0500 > Subject: [PATCH] NFS: Move readdirplus flag to directory context > > For a concurrent 'ls -l' workload, processes can stack up in nfs_readdir() > both waiting on the next page and taking turns filling the next page from > XDR, but only one of them will have desc->plus set because setting it > clears the flag on the directory. If a page is filled by a process that > doesn't have desc->plus set then the next pass through lookup() will cause > it to dump the entire page cache with nfs_force_use_readdirplus(). Then > the next readdir starts all over filling the pagecache. Forward progress > happens, but only after many steps back re-filling the pagecache. > > Fix this by moving the flag to use readdirplus to each open directory > context. > > Suggested-by: Trond Myklebust <trondmy@primarydata.com> > Signed-off-by: Benjamin Coddington <bcodding@redhat.com> > --- > fs/nfs/dir.c | 39 ++++++++++++++++++++++++--------------- > include/linux/nfs_fs.h | 1 + > 2 files changed, 25 insertions(+), 15 deletions(-) > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c > index 7483722162fa..818172606fed 100644 > --- a/fs/nfs/dir.c > +++ b/fs/nfs/dir.c > @@ -78,6 +78,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir > ctx->dir_cookie = 0; > ctx->dup_cookie = 0; > ctx->cred = get_rpccred(cred); > + ctx->use_readdir_plus = 0; > spin_lock(&dir->i_lock); > list_add(&ctx->list, &nfsi->open_files); > spin_unlock(&dir->i_lock); > @@ -443,17 +444,35 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) > } > > static > -bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx) > +bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx, > + struct nfs_open_dir_context *dir_ctx) > { > if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) > return false; > - if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags)) > + if (xchg(&dir_ctx->use_readdir_plus, 0)) > return true; > if (ctx->pos == 0) > return true; > return false; > } > > +static > +void nfs_set_readdirplus(struct inode *dir, int force) > +{ > + struct nfs_inode *nfsi = NFS_I(dir); > + struct nfs_open_dir_context *ctx; > + > + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && > + !list_empty(&nfsi->open_files)) { > + spin_lock(&dir->i_lock); > + list_for_each_entry(ctx, &nfsi->open_files, list) > + ctx->use_readdir_plus = 1; > + spin_unlock(&dir->i_lock); > + if (force) > + invalidate_mapping_pages(dir->i_mapping, 0, -1); > + } > +} > + > /* > * This function is called by the lookup and getattr code to request the > * use of readdirplus to accelerate any future lookups in the same > @@ -461,11 +480,7 @@ bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx) > */ > void nfs_advise_use_readdirplus(struct inode *dir) > { > - struct nfs_inode *nfsi = NFS_I(dir); > - > - if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && > - !list_empty(&nfsi->open_files)) > - set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); > + nfs_set_readdirplus(dir, 0); > } > > /* > @@ -478,13 +493,7 @@ void nfs_advise_use_readdirplus(struct inode *dir) > */ > void nfs_force_use_readdirplus(struct inode *dir) > { > - struct nfs_inode *nfsi = NFS_I(dir); > - > - if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && > - !list_empty(&nfsi->open_files)) { > - set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); > - invalidate_mapping_pages(dir->i_mapping, 0, -1); > - } > + nfs_set_readdirplus(dir, 1); > } > > static > @@ -921,7 +930,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) > desc->ctx = ctx; > desc->dir_cookie = &dir_ctx->dir_cookie; > desc->decode = NFS_PROTO(inode)->decode_dirent; > - desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; > + desc->plus = nfs_use_readdirplus(inode, ctx, dir_ctx) ? 1 : 0; This fixes desc->plus at the beginning of the readdir() call. Perhaps we should instead check the value of ctx->use_readdir_plus in the call to nfs_readdir_xdr_filler(), and just resetting cts->use_readdir_plus at the very end of nfs_readdir()? > > if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) > res = nfs_revalidate_mapping(inode, file->f_mapping); > diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h > index cb631973839a..fe06c1dd1737 100644 > --- a/include/linux/nfs_fs.h > +++ b/include/linux/nfs_fs.h > @@ -89,6 +89,7 @@ struct nfs_open_dir_context { > __u64 dir_cookie; > __u64 dup_cookie; > signed char duped; > + int use_readdir_plus; > }; > > /* > -- > 2.5.5 > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 7 Dec 2016, at 17:41, Trond Myklebust wrote: >> On Dec 7, 2016, at 17:34, Benjamin Coddington <bcodding@redhat.com> >> wrote: >> static >> @@ -921,7 +930,7 @@ static int nfs_readdir(struct file *file, struct >> dir_context *ctx) >> desc->ctx = ctx; >> desc->dir_cookie = &dir_ctx->dir_cookie; >> desc->decode = NFS_PROTO(inode)->decode_dirent; >> - desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; >> + desc->plus = nfs_use_readdirplus(inode, ctx, dir_ctx) ? 1 : 0; > > This fixes desc->plus at the beginning of the readdir() call. Perhaps > we > should instead check the value of ctx->use_readdir_plus in the call to > nfs_readdir_xdr_filler(), and just resetting cts->use_readdir_plus at > the > very end of nfs_readdir()? I don't understand the functional difference. Is this just a preference? There must be something else happening.. dcache is getting under pressure pruned maybe, that causes a miss and then the process starts over? Ben -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> On Dec 7, 2016, at 17:55, Benjamin Coddington <bcodding@redhat.com> wrote: > > On 7 Dec 2016, at 17:41, Trond Myklebust wrote: > >>> On Dec 7, 2016, at 17:34, Benjamin Coddington <bcodding@redhat.com> wrote: >>> static >>> @@ -921,7 +930,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) >>> desc->ctx = ctx; >>> desc->dir_cookie = &dir_ctx->dir_cookie; >>> desc->decode = NFS_PROTO(inode)->decode_dirent; >>> - desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; >>> + desc->plus = nfs_use_readdirplus(inode, ctx, dir_ctx) ? 1 : 0; >> >> This fixes desc->plus at the beginning of the readdir() call. Perhaps we >> should instead check the value of ctx->use_readdir_plus in the call to >> nfs_readdir_xdr_filler(), and just resetting cts->use_readdir_plus at the >> very end of nfs_readdir()? > > I don't understand the functional difference. Is this just a preference? No. The functional difference is that we take into account the fact that a process may be in the readdir() code while a GETATTR or LOOKUP from a second process is triggering “use readdirplus” feedback. > > There must be something else happening.. dcache is getting under pressure > pruned maybe, that causes a miss and then the process starts over? > > Ben >
On 7 Dec 2016, at 17:59, Trond Myklebust wrote: >> On Dec 7, 2016, at 17:55, Benjamin Coddington <bcodding@redhat.com> >> wrote: >> >> On 7 Dec 2016, at 17:41, Trond Myklebust wrote: >> >>>> On Dec 7, 2016, at 17:34, Benjamin Coddington <bcodding@redhat.com> >>>> wrote: >>>> static >>>> @@ -921,7 +930,7 @@ static int nfs_readdir(struct file *file, >>>> struct dir_context *ctx) >>>> desc->ctx = ctx; >>>> desc->dir_cookie = &dir_ctx->dir_cookie; >>>> desc->decode = NFS_PROTO(inode)->decode_dirent; >>>> - desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; >>>> + desc->plus = nfs_use_readdirplus(inode, ctx, dir_ctx) ? 1 : 0; >>> >>> This fixes desc->plus at the beginning of the readdir() call. >>> Perhaps we >>> should instead check the value of ctx->use_readdir_plus in the call >>> to >>> nfs_readdir_xdr_filler(), and just resetting cts->use_readdir_plus >>> at the >>> very end of nfs_readdir()? >> >> I don't understand the functional difference. Is this just a >> preference? > > No. The functional difference is that we take into account the fact > that a > process may be in the readdir() code while a GETATTR or LOOKUP from a > second process is triggering “use readdirplus” feedback. This should only matter if the concurrent processes have different buffer sizes or start at a different offsets -- which shouldn't happen with plain old 'ls -l'. .. or maybe I'm wrong if, hmm.. if acdirmin ran out (maybe?).. or if we mix 'ls -l' and 'ls'.. or if we have pages getting reclaimed.. OK. I'll try it. Thanks for your suggestions. Ben -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 7 Dec 2016, at 18:10, Benjamin Coddington wrote: > On 7 Dec 2016, at 17:59, Trond Myklebust wrote: > >>> On Dec 7, 2016, at 17:55, Benjamin Coddington <bcodding@redhat.com> >>> wrote: >>> >>> On 7 Dec 2016, at 17:41, Trond Myklebust wrote: >>> >>>>> On Dec 7, 2016, at 17:34, Benjamin Coddington >>>>> <bcodding@redhat.com> wrote: >>>>> static >>>>> @@ -921,7 +930,7 @@ static int nfs_readdir(struct file *file, >>>>> struct dir_context *ctx) >>>>> desc->ctx = ctx; >>>>> desc->dir_cookie = &dir_ctx->dir_cookie; >>>>> desc->decode = NFS_PROTO(inode)->decode_dirent; >>>>> - desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; >>>>> + desc->plus = nfs_use_readdirplus(inode, ctx, dir_ctx) ? 1 : 0; >>>> >>>> This fixes desc->plus at the beginning of the readdir() call. >>>> Perhaps we >>>> should instead check the value of ctx->use_readdir_plus in the call >>>> to >>>> nfs_readdir_xdr_filler(), and just resetting cts->use_readdir_plus >>>> at the >>>> very end of nfs_readdir()? >>> >>> I don't understand the functional difference. Is this just a >>> preference? >> >> No. The functional difference is that we take into account the fact >> that a >> process may be in the readdir() code while a GETATTR or LOOKUP from a >> second process is triggering “use readdirplus” feedback. > > This should only matter if the concurrent processes have different > buffer > sizes or start at a different offsets -- which shouldn't happen with > plain > old 'ls -l'. > > .. or maybe I'm wrong if, hmm.. if acdirmin ran out (maybe?).. or if > we mix > 'ls -l' and 'ls'.. or if we have pages getting reclaimed.. OK. I'll > try it. This doesn't help. The issue is that anything more than a couple of processes cause contention on the directory's i_lock. The i_lock is taken x entries x processes. The inode flags and serialization worked far better. If we add another inode flag, then we can add a DOING_RDPLUS. A process entering nfs_readdir() that sees ADVISE and not DOING clears it and sets DOING and remembers to clear DOING on exit of nfs_readdir(). Any process that sees DOING uses plus. Ben -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7483722162fa..818172606fed 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -78,6 +78,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir ctx->dir_cookie = 0; ctx->dup_cookie = 0; ctx->cred = get_rpccred(cred); + ctx->use_readdir_plus = 0; spin_lock(&dir->i_lock); list_add(&ctx->list, &nfsi->open_files); spin_unlock(&dir->i_lock); @@ -443,17 +444,35 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) } static -bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx) +bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx, + struct nfs_open_dir_context *dir_ctx) { if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) return false; - if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags)) + if (xchg(&dir_ctx->use_readdir_plus, 0)) return true; if (ctx->pos == 0) return true; return false; } +static +void nfs_set_readdirplus(struct inode *dir, int force) +{ + struct nfs_inode *nfsi = NFS_I(dir); + struct nfs_open_dir_context *ctx; + + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && + !list_empty(&nfsi->open_files)) { + spin_lock(&dir->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) + ctx->use_readdir_plus = 1; + spin_unlock(&dir->i_lock); + if (force) + invalidate_mapping_pages(dir->i_mapping, 0, -1); + } +} + /* * This function is called by the lookup and getattr code to request the * use of readdirplus to accelerate any future lookups in the same @@ -461,11 +480,7 @@ bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx) */ void nfs_advise_use_readdirplus(struct inode *dir) { - struct nfs_inode *nfsi = NFS_I(dir); - - if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && - !list_empty(&nfsi->open_files)) - set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); + nfs_set_readdirplus(dir, 0); } /* @@ -478,13 +493,7 @@ void nfs_advise_use_readdirplus(struct inode *dir) */ void nfs_force_use_readdirplus(struct inode *dir) { - struct nfs_inode *nfsi = NFS_I(dir); - - if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && - !list_empty(&nfsi->open_files)) { - set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); - invalidate_mapping_pages(dir->i_mapping, 0, -1); - } + nfs_set_readdirplus(dir, 1); } static @@ -921,7 +930,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->ctx = ctx; desc->dir_cookie = &dir_ctx->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; - desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; + desc->plus = nfs_use_readdirplus(inode, ctx, dir_ctx) ? 1 : 0; if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) res = nfs_revalidate_mapping(inode, file->f_mapping); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index cb631973839a..fe06c1dd1737 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -89,6 +89,7 @@ struct nfs_open_dir_context { __u64 dir_cookie; __u64 dup_cookie; signed char duped; + int use_readdir_plus; }; /*