diff mbox

[1/2] NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION replies to OP_SEQUENCE

Message ID 1487287688.122266.2.camel@primarydata.com (mailing list archive)
State New, archived
Headers show

Commit Message

Trond Myklebust Feb. 16, 2017, 11:28 p.m. UTC
On Thu, 2017-02-16 at 17:14 -0500, Olga Kornievskaia wrote:
> On Thu, Feb 16, 2017 at 4:45 PM, Trond Myklebust
> <trondmy@primarydata.com> wrote:
> > 
> > Olga, all your test is doing is showing that we hit the race more
> > often. Fair enough, I’ll ask Anna to revert the patch. However
> > reverting the patch won’t prevent the server from returning
> > NFS4ERR_BAD_STATEID in any cases where the calls to
> > nfs4_set_rw_stateid() happen before state recovery. This is why we
> > have the loop in nfs42_proc_copy().
> 
> I thought that if retry of the operation waits for the recovery to
> complete then nfs4_set_rw_stateid() will choose the correct stateid,
> is that not correct?
> 
> If that's not correct, then we somehow need to separate the case when
> BAD_STATEID should indeed mark the locks lost vs this case where the
> code erroneously used the bad stateid (oops) and it should really
> ignore this error. This really doesn't sound very plausible to
> accomplish.

Does this patch fix the problem?

8<-------------------------------------------------------------
From bbae95e8f97cad6a91ca8caa50b61cae789632f9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 16 Feb 2017 18:14:38 -0500
Subject: [PATCH] NFSv4: Fix reboot recovery in copy offload

Copy offload code needs to be hooked into the code for handling
NFS4ERR_BAD_STATEID by ensuring that we set the "stateid" field
in struct nfs4_exception.

Reported-by: Olga Kornievskaia <aglo@umich.edu>
Fixes: 2e72448b07dc3 ("NFS: Add COPY nfs operation")
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs42proc.c | 57 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 33 insertions(+), 24 deletions(-)

-- 
2.9.3

-- 
Trond Myklebust
Linux NFS client maintainer, PrimaryData
trond.myklebust@primarydata.com

Comments

Olga Kornievskaia Feb. 17, 2017, 2:46 p.m. UTC | #1
On Thu, Feb 16, 2017 at 6:28 PM, Trond Myklebust
<trondmy@primarydata.com> wrote:
> On Thu, 2017-02-16 at 17:14 -0500, Olga Kornievskaia wrote:
>> On Thu, Feb 16, 2017 at 4:45 PM, Trond Myklebust
>> <trondmy@primarydata.com> wrote:
>> >
>> > Olga, all your test is doing is showing that we hit the race more
>> > often. Fair enough, I’ll ask Anna to revert the patch. However
>> > reverting the patch won’t prevent the server from returning
>> > NFS4ERR_BAD_STATEID in any cases where the calls to
>> > nfs4_set_rw_stateid() happen before state recovery. This is why we
>> > have the loop in nfs42_proc_copy().
>>
>> I thought that if retry of the operation waits for the recovery to
>> complete then nfs4_set_rw_stateid() will choose the correct stateid,
>> is that not correct?
>>
>> If that's not correct, then we somehow need to separate the case when
>> BAD_STATEID should indeed mark the locks lost vs this case where the
>> code erroneously used the bad stateid (oops) and it should really
>> ignore this error. This really doesn't sound very plausible to
>> accomplish.
>
> Does this patch fix the problem?
>
> 8<-------------------------------------------------------------
> From bbae95e8f97cad6a91ca8caa50b61cae789632f9 Mon Sep 17 00:00:00 2001
> From: Trond Myklebust <trond.myklebust@primarydata.com>
> Date: Thu, 16 Feb 2017 18:14:38 -0500
> Subject: [PATCH] NFSv4: Fix reboot recovery in copy offload
>
> Copy offload code needs to be hooked into the code for handling
> NFS4ERR_BAD_STATEID by ensuring that we set the "stateid" field
> in struct nfs4_exception.
>
> Reported-by: Olga Kornievskaia <aglo@umich.edu>
> Fixes: 2e72448b07dc3 ("NFS: Add COPY nfs operation")
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  fs/nfs/nfs42proc.c | 57 +++++++++++++++++++++++++++++++-----------------------
>  1 file changed, 33 insertions(+), 24 deletions(-)
>
> diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> index d12ff9385f49..baf1fe2dc296 100644
> --- a/fs/nfs/nfs42proc.c
> +++ b/fs/nfs/nfs42proc.c
> @@ -128,20 +128,13 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
>         return err;
>  }
>
> -static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
> +static ssize_t _nfs42_proc_copy(struct file *src,
>                                 struct nfs_lock_context *src_lock,
> -                               struct file *dst, loff_t pos_dst,
> +                               struct file *dst,
>                                 struct nfs_lock_context *dst_lock,
> -                               size_t count)
> +                               struct nfs42_copy_args *args,
> +                               struct nfs42_copy_res *res)
>  {
> -       struct nfs42_copy_args args = {
> -               .src_fh         = NFS_FH(file_inode(src)),
> -               .src_pos        = pos_src,
> -               .dst_fh         = NFS_FH(file_inode(dst)),
> -               .dst_pos        = pos_dst,
> -               .count          = count,
> -       };
> -       struct nfs42_copy_res res;
>         struct rpc_message msg = {
>                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
>                 .rpc_argp = &args,
> @@ -149,9 +142,12 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
>         };
>         struct inode *dst_inode = file_inode(dst);
>         struct nfs_server *server = NFS_SERVER(dst_inode);
> +       loff_t pos_src = args->src_pos;
> +       loff_t pos_dst = args->dst_pos;
> +       size_t count = args->count;
>         int status;
>
> -       status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
> +       status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context,
>                                      src_lock, FMODE_READ);
>         if (status)
>                 return status;
> @@ -161,7 +157,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
>         if (status)
>                 return status;
>
> -       status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
> +       status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context,
>                                      dst_lock, FMODE_WRITE);
>         if (status)
>                 return status;
> @@ -171,22 +167,22 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
>                 return status;
>
>         status = nfs4_call_sync(server->client, server, &msg,
> -                               &args.seq_args, &res.seq_res, 0);
> +                               &args->seq_args, &res->seq_res, 0);
>         if (status == -ENOTSUPP)
>                 server->caps &= ~NFS_CAP_COPY;
>         if (status)
>                 return status;
>
> -       if (res.write_res.verifier.committed != NFS_FILE_SYNC) {
> -               status = nfs_commit_file(dst, &res.write_res.verifier.verifier);
> +       if (res->write_res.verifier.committed != NFS_FILE_SYNC) {
> +               status = nfs_commit_file(dst, &res->write_res.verifier.verifier);
>                 if (status)
>                         return status;
>         }
>
>         truncate_pagecache_range(dst_inode, pos_dst,
> -                                pos_dst + res.write_res.count);
> +                                pos_dst + res->write_res.count);
>
> -       return res.write_res.count;
> +       return res->write_res.count;
>  }
>
>  ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
> @@ -196,8 +192,22 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
>         struct nfs_server *server = NFS_SERVER(file_inode(dst));
>         struct nfs_lock_context *src_lock;
>         struct nfs_lock_context *dst_lock;
> -       struct nfs4_exception src_exception = { };
> -       struct nfs4_exception dst_exception = { };
> +       struct nfs42_copy_args args = {
> +               .src_fh         = NFS_FH(file_inode(src)),
> +               .src_pos        = pos_src,
> +               .dst_fh         = NFS_FH(file_inode(dst)),
> +               .dst_pos        = pos_dst,
> +               .count          = count,
> +       };
> +       struct nfs42_copy_res res;
> +       struct nfs4_exception src_exception = {
> +               .inode          = file_inode(src),
> +               .stateid        = &args.src_stateid,
> +       };
> +       struct nfs4_exception dst_exception = {
> +               .inode          = file_inode(dst),
> +               .stateid        = &args.dst_stateid,
> +       };
>         ssize_t err, err2;
>
>         if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
> @@ -207,7 +217,6 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
>         if (IS_ERR(src_lock))
>                 return PTR_ERR(src_lock);
>
> -       src_exception.inode = file_inode(src);
>         src_exception.state = src_lock->open_context->state;
>
>         dst_lock = nfs_get_lock_context(nfs_file_open_context(dst));
> @@ -216,13 +225,13 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
>                 goto out_put_src_lock;
>         }
>
> -       dst_exception.inode = file_inode(dst);
>         dst_exception.state = dst_lock->open_context->state;
>
>         do {
>                 inode_lock(file_inode(dst));
> -               err = _nfs42_proc_copy(src, pos_src, src_lock,
> -                                      dst, pos_dst, dst_lock, count);
> +               err = _nfs42_proc_copy(src, src_lock,
> +                               dst, dst_lock,
> +                               &args, &res);
>                 inode_unlock(file_inode(dst));
>
>                 if (err == -ENOTSUPP) {
> --
> 2.9.3

I wish it did but no it does not.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Trond Myklebust Feb. 17, 2017, 2:58 p.m. UTC | #2
On Fri, 2017-02-17 at 09:46 -0500, Olga Kornievskaia wrote:
> On Thu, Feb 16, 2017 at 6:28 PM, Trond Myklebust

> <trondmy@primarydata.com> wrote:

> > On Thu, 2017-02-16 at 17:14 -0500, Olga Kornievskaia wrote:

> > > On Thu, Feb 16, 2017 at 4:45 PM, Trond Myklebust

> > > <trondmy@primarydata.com> wrote:

> > > > 

> > > > Olga, all your test is doing is showing that we hit the race

> > > > more

> > > > often. Fair enough, I’ll ask Anna to revert the patch. However

> > > > reverting the patch won’t prevent the server from returning

> > > > NFS4ERR_BAD_STATEID in any cases where the calls to

> > > > nfs4_set_rw_stateid() happen before state recovery. This is why

> > > > we

> > > > have the loop in nfs42_proc_copy().

> > > 

> > > I thought that if retry of the operation waits for the recovery

> > > to

> > > complete then nfs4_set_rw_stateid() will choose the correct

> > > stateid,

> > > is that not correct?

> > > 

> > > If that's not correct, then we somehow need to separate the case

> > > when

> > > BAD_STATEID should indeed mark the locks lost vs this case where

> > > the

> > > code erroneously used the bad stateid (oops) and it should really

> > > ignore this error. This really doesn't sound very plausible to

> > > accomplish.

> > 

> > Does this patch fix the problem?

> > 

> > 8<-------------------------------------------------------------

> > From bbae95e8f97cad6a91ca8caa50b61cae789632f9 Mon Sep 17 00:00:00

> > 2001

> > From: Trond Myklebust <trond.myklebust@primarydata.com>

> > Date: Thu, 16 Feb 2017 18:14:38 -0500

> > Subject: [PATCH] NFSv4: Fix reboot recovery in copy offload

> > 

> > Copy offload code needs to be hooked into the code for handling

> > NFS4ERR_BAD_STATEID by ensuring that we set the "stateid" field

> > in struct nfs4_exception.

> > 

> > Reported-by: Olga Kornievskaia <aglo@umich.edu>

> > Fixes: 2e72448b07dc3 ("NFS: Add COPY nfs operation")

> > Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

> > ---

> >  fs/nfs/nfs42proc.c | 57 +++++++++++++++++++++++++++++++-----------

> > ------------

> >  1 file changed, 33 insertions(+), 24 deletions(-)

> > 

> > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c

> > index d12ff9385f49..baf1fe2dc296 100644

> > --- a/fs/nfs/nfs42proc.c

> > +++ b/fs/nfs/nfs42proc.c

> > @@ -128,20 +128,13 @@ int nfs42_proc_deallocate(struct file *filep,

> > loff_t offset, loff_t len)

> >         return err;

> >  }

> > 

> > -static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,

> > +static ssize_t _nfs42_proc_copy(struct file *src,

> >                                 struct nfs_lock_context *src_lock,

> > -                               struct file *dst, loff_t pos_dst,

> > +                               struct file *dst,

> >                                 struct nfs_lock_context *dst_lock,

> > -                               size_t count)

> > +                               struct nfs42_copy_args *args,

> > +                               struct nfs42_copy_res *res)

> >  {

> > -       struct nfs42_copy_args args = {

> > -               .src_fh         = NFS_FH(file_inode(src)),

> > -               .src_pos        = pos_src,

> > -               .dst_fh         = NFS_FH(file_inode(dst)),

> > -               .dst_pos        = pos_dst,

> > -               .count          = count,

> > -       };

> > -       struct nfs42_copy_res res;

> >         struct rpc_message msg = {

> >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],

> >                 .rpc_argp = &args,

> > @@ -149,9 +142,12 @@ static ssize_t _nfs42_proc_copy(struct file

> > *src, loff_t pos_src,

> >         };

> >         struct inode *dst_inode = file_inode(dst);

> >         struct nfs_server *server = NFS_SERVER(dst_inode);

> > +       loff_t pos_src = args->src_pos;

> > +       loff_t pos_dst = args->dst_pos;

> > +       size_t count = args->count;

> >         int status;

> > 

> > -       status = nfs4_set_rw_stateid(&args.src_stateid, src_lock-

> > >open_context,

> > +       status = nfs4_set_rw_stateid(&args->src_stateid, src_lock-

> > >open_context,

> >                                      src_lock, FMODE_READ);

> >         if (status)

> >                 return status;

> > @@ -161,7 +157,7 @@ static ssize_t _nfs42_proc_copy(struct file

> > *src, loff_t pos_src,

> >         if (status)

> >                 return status;

> > 

> > -       status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock-

> > >open_context,

> > +       status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock-

> > >open_context,

> >                                      dst_lock, FMODE_WRITE);

> >         if (status)

> >                 return status;

> > @@ -171,22 +167,22 @@ static ssize_t _nfs42_proc_copy(struct file

> > *src, loff_t pos_src,

> >                 return status;

> > 

> >         status = nfs4_call_sync(server->client, server, &msg,

> > -                               &args.seq_args, &res.seq_res, 0);

> > +                               &args->seq_args, &res->seq_res, 0);

> >         if (status == -ENOTSUPP)

> >                 server->caps &= ~NFS_CAP_COPY;

> >         if (status)

> >                 return status;

> > 

> > -       if (res.write_res.verifier.committed != NFS_FILE_SYNC) {

> > -               status = nfs_commit_file(dst,

> > &res.write_res.verifier.verifier);

> > +       if (res->write_res.verifier.committed != NFS_FILE_SYNC) {

> > +               status = nfs_commit_file(dst, &res-

> > >write_res.verifier.verifier);

> >                 if (status)

> >                         return status;

> >         }

> > 

> >         truncate_pagecache_range(dst_inode, pos_dst,

> > -                                pos_dst + res.write_res.count);

> > +                                pos_dst + res->write_res.count);

> > 

> > -       return res.write_res.count;

> > +       return res->write_res.count;

> >  }

> > 

> >  ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,

> > @@ -196,8 +192,22 @@ ssize_t nfs42_proc_copy(struct file *src,

> > loff_t pos_src,

> >         struct nfs_server *server = NFS_SERVER(file_inode(dst));

> >         struct nfs_lock_context *src_lock;

> >         struct nfs_lock_context *dst_lock;

> > -       struct nfs4_exception src_exception = { };

> > -       struct nfs4_exception dst_exception = { };

> > +       struct nfs42_copy_args args = {

> > +               .src_fh         = NFS_FH(file_inode(src)),

> > +               .src_pos        = pos_src,

> > +               .dst_fh         = NFS_FH(file_inode(dst)),

> > +               .dst_pos        = pos_dst,

> > +               .count          = count,

> > +       };

> > +       struct nfs42_copy_res res;

> > +       struct nfs4_exception src_exception = {

> > +               .inode          = file_inode(src),

> > +               .stateid        = &args.src_stateid,

> > +       };

> > +       struct nfs4_exception dst_exception = {

> > +               .inode          = file_inode(dst),

> > +               .stateid        = &args.dst_stateid,

> > +       };

> >         ssize_t err, err2;

> > 

> >         if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))

> > @@ -207,7 +217,6 @@ ssize_t nfs42_proc_copy(struct file *src,

> > loff_t pos_src,

> >         if (IS_ERR(src_lock))

> >                 return PTR_ERR(src_lock);

> > 

> > -       src_exception.inode = file_inode(src);

> >         src_exception.state = src_lock->open_context->state;

> > 

> >         dst_lock =

> > nfs_get_lock_context(nfs_file_open_context(dst));

> > @@ -216,13 +225,13 @@ ssize_t nfs42_proc_copy(struct file *src,

> > loff_t pos_src,

> >                 goto out_put_src_lock;

> >         }

> > 

> > -       dst_exception.inode = file_inode(dst);

> >         dst_exception.state = dst_lock->open_context->state;

> > 

> >         do {

> >                 inode_lock(file_inode(dst));

> > -               err = _nfs42_proc_copy(src, pos_src, src_lock,

> > -                                      dst, pos_dst, dst_lock,

> > count);

> > +               err = _nfs42_proc_copy(src, src_lock,

> > +                               dst, dst_lock,

> > +                               &args, &res);

> >                 inode_unlock(file_inode(dst));

> > 

> >                 if (err == -ENOTSUPP) {

> > --

> > 2.9.3

> 

> I wish it did but no it does not.

> 


So what is still happening? With this patch, the error handler should
be able to distinguish between a stateid that is up to date and one
that isn't.

There might, however, still be a problem because we have 2 stateids,
meaning that one could be stale (and generating NFS4ERR_BAD_STATEID)
and the other one not. We might have to special case that, and do the
comparisons inside nfs42_proc_copy instead of using the generic code in
nfs4_handle_exception().

-- 
Trond Myklebust
Linux NFS client maintainer, PrimaryData
trond.myklebust@primarydata.com
Olga Kornievskaia Feb. 17, 2017, 3:07 p.m. UTC | #3
On Fri, Feb 17, 2017 at 9:58 AM, Trond Myklebust
<trondmy@primarydata.com> wrote:
> On Fri, 2017-02-17 at 09:46 -0500, Olga Kornievskaia wrote:
>> On Thu, Feb 16, 2017 at 6:28 PM, Trond Myklebust
>> <trondmy@primarydata.com> wrote:
>> > On Thu, 2017-02-16 at 17:14 -0500, Olga Kornievskaia wrote:
>> > > On Thu, Feb 16, 2017 at 4:45 PM, Trond Myklebust
>> > > <trondmy@primarydata.com> wrote:
>> > > >
>> > > > Olga, all your test is doing is showing that we hit the race
>> > > > more
>> > > > often. Fair enough, I’ll ask Anna to revert the patch. However
>> > > > reverting the patch won’t prevent the server from returning
>> > > > NFS4ERR_BAD_STATEID in any cases where the calls to
>> > > > nfs4_set_rw_stateid() happen before state recovery. This is why
>> > > > we
>> > > > have the loop in nfs42_proc_copy().
>> > >
>> > > I thought that if retry of the operation waits for the recovery
>> > > to
>> > > complete then nfs4_set_rw_stateid() will choose the correct
>> > > stateid,
>> > > is that not correct?
>> > >
>> > > If that's not correct, then we somehow need to separate the case
>> > > when
>> > > BAD_STATEID should indeed mark the locks lost vs this case where
>> > > the
>> > > code erroneously used the bad stateid (oops) and it should really
>> > > ignore this error. This really doesn't sound very plausible to
>> > > accomplish.
>> >
>> > Does this patch fix the problem?
>> >
>> > 8<-------------------------------------------------------------
>> > From bbae95e8f97cad6a91ca8caa50b61cae789632f9 Mon Sep 17 00:00:00
>> > 2001
>> > From: Trond Myklebust <trond.myklebust@primarydata.com>
>> > Date: Thu, 16 Feb 2017 18:14:38 -0500
>> > Subject: [PATCH] NFSv4: Fix reboot recovery in copy offload
>> >
>> > Copy offload code needs to be hooked into the code for handling
>> > NFS4ERR_BAD_STATEID by ensuring that we set the "stateid" field
>> > in struct nfs4_exception.
>> >
>> > Reported-by: Olga Kornievskaia <aglo@umich.edu>
>> > Fixes: 2e72448b07dc3 ("NFS: Add COPY nfs operation")
>> > Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
>> > ---
>> >  fs/nfs/nfs42proc.c | 57 +++++++++++++++++++++++++++++++-----------
>> > ------------
>> >  1 file changed, 33 insertions(+), 24 deletions(-)
>> >
>> > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
>> > index d12ff9385f49..baf1fe2dc296 100644
>> > --- a/fs/nfs/nfs42proc.c
>> > +++ b/fs/nfs/nfs42proc.c
>> > @@ -128,20 +128,13 @@ int nfs42_proc_deallocate(struct file *filep,
>> > loff_t offset, loff_t len)
>> >         return err;
>> >  }
>> >
>> > -static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
>> > +static ssize_t _nfs42_proc_copy(struct file *src,
>> >                                 struct nfs_lock_context *src_lock,
>> > -                               struct file *dst, loff_t pos_dst,
>> > +                               struct file *dst,
>> >                                 struct nfs_lock_context *dst_lock,
>> > -                               size_t count)
>> > +                               struct nfs42_copy_args *args,
>> > +                               struct nfs42_copy_res *res)
>> >  {
>> > -       struct nfs42_copy_args args = {
>> > -               .src_fh         = NFS_FH(file_inode(src)),
>> > -               .src_pos        = pos_src,
>> > -               .dst_fh         = NFS_FH(file_inode(dst)),
>> > -               .dst_pos        = pos_dst,
>> > -               .count          = count,
>> > -       };
>> > -       struct nfs42_copy_res res;
>> >         struct rpc_message msg = {
>> >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
>> >                 .rpc_argp = &args,
>> > @@ -149,9 +142,12 @@ static ssize_t _nfs42_proc_copy(struct file
>> > *src, loff_t pos_src,
>> >         };
>> >         struct inode *dst_inode = file_inode(dst);
>> >         struct nfs_server *server = NFS_SERVER(dst_inode);
>> > +       loff_t pos_src = args->src_pos;
>> > +       loff_t pos_dst = args->dst_pos;
>> > +       size_t count = args->count;
>> >         int status;
>> >
>> > -       status = nfs4_set_rw_stateid(&args.src_stateid, src_lock-
>> > >open_context,
>> > +       status = nfs4_set_rw_stateid(&args->src_stateid, src_lock-
>> > >open_context,
>> >                                      src_lock, FMODE_READ);
>> >         if (status)
>> >                 return status;
>> > @@ -161,7 +157,7 @@ static ssize_t _nfs42_proc_copy(struct file
>> > *src, loff_t pos_src,
>> >         if (status)
>> >                 return status;
>> >
>> > -       status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock-
>> > >open_context,
>> > +       status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock-
>> > >open_context,
>> >                                      dst_lock, FMODE_WRITE);
>> >         if (status)
>> >                 return status;
>> > @@ -171,22 +167,22 @@ static ssize_t _nfs42_proc_copy(struct file
>> > *src, loff_t pos_src,
>> >                 return status;
>> >
>> >         status = nfs4_call_sync(server->client, server, &msg,
>> > -                               &args.seq_args, &res.seq_res, 0);
>> > +                               &args->seq_args, &res->seq_res, 0);
>> >         if (status == -ENOTSUPP)
>> >                 server->caps &= ~NFS_CAP_COPY;
>> >         if (status)
>> >                 return status;
>> >
>> > -       if (res.write_res.verifier.committed != NFS_FILE_SYNC) {
>> > -               status = nfs_commit_file(dst,
>> > &res.write_res.verifier.verifier);
>> > +       if (res->write_res.verifier.committed != NFS_FILE_SYNC) {
>> > +               status = nfs_commit_file(dst, &res-
>> > >write_res.verifier.verifier);
>> >                 if (status)
>> >                         return status;
>> >         }
>> >
>> >         truncate_pagecache_range(dst_inode, pos_dst,
>> > -                                pos_dst + res.write_res.count);
>> > +                                pos_dst + res->write_res.count);
>> >
>> > -       return res.write_res.count;
>> > +       return res->write_res.count;
>> >  }
>> >
>> >  ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
>> > @@ -196,8 +192,22 @@ ssize_t nfs42_proc_copy(struct file *src,
>> > loff_t pos_src,
>> >         struct nfs_server *server = NFS_SERVER(file_inode(dst));
>> >         struct nfs_lock_context *src_lock;
>> >         struct nfs_lock_context *dst_lock;
>> > -       struct nfs4_exception src_exception = { };
>> > -       struct nfs4_exception dst_exception = { };
>> > +       struct nfs42_copy_args args = {
>> > +               .src_fh         = NFS_FH(file_inode(src)),
>> > +               .src_pos        = pos_src,
>> > +               .dst_fh         = NFS_FH(file_inode(dst)),
>> > +               .dst_pos        = pos_dst,
>> > +               .count          = count,
>> > +       };
>> > +       struct nfs42_copy_res res;
>> > +       struct nfs4_exception src_exception = {
>> > +               .inode          = file_inode(src),
>> > +               .stateid        = &args.src_stateid,
>> > +       };
>> > +       struct nfs4_exception dst_exception = {
>> > +               .inode          = file_inode(dst),
>> > +               .stateid        = &args.dst_stateid,
>> > +       };
>> >         ssize_t err, err2;
>> >
>> >         if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
>> > @@ -207,7 +217,6 @@ ssize_t nfs42_proc_copy(struct file *src,
>> > loff_t pos_src,
>> >         if (IS_ERR(src_lock))
>> >                 return PTR_ERR(src_lock);
>> >
>> > -       src_exception.inode = file_inode(src);
>> >         src_exception.state = src_lock->open_context->state;
>> >
>> >         dst_lock =
>> > nfs_get_lock_context(nfs_file_open_context(dst));
>> > @@ -216,13 +225,13 @@ ssize_t nfs42_proc_copy(struct file *src,
>> > loff_t pos_src,
>> >                 goto out_put_src_lock;
>> >         }
>> >
>> > -       dst_exception.inode = file_inode(dst);
>> >         dst_exception.state = dst_lock->open_context->state;
>> >
>> >         do {
>> >                 inode_lock(file_inode(dst));
>> > -               err = _nfs42_proc_copy(src, pos_src, src_lock,
>> > -                                      dst, pos_dst, dst_lock,
>> > count);
>> > +               err = _nfs42_proc_copy(src, src_lock,
>> > +                               dst, dst_lock,
>> > +                               &args, &res);
>> >                 inode_unlock(file_inode(dst));
>> >
>> >                 if (err == -ENOTSUPP) {
>> > --
>> > 2.9.3
>>
>> I wish it did but no it does not.
>>
>
> So what is still happening? With this patch, the error handler should
> be able to distinguish between a stateid that is up to date and one
> that isn't.
>
> There might, however, still be a problem because we have 2 stateids,
> meaning that one could be stale (and generating NFS4ERR_BAD_STATEID)
> and the other one not. We might have to special case that, and do the
> comparisons inside nfs42_proc_copy instead of using the generic code in
> nfs4_handle_exception().

After COPY gets the BAD_STATEID (on the old stateids), I see 4
TEST_STATEIDs sent with 3 distinct stateids which are the new open
stateid for the src file, the locking stateid for the source file and
the new old stateid for the destination file. All of which are ok.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olga Kornievskaia Feb. 17, 2017, 3:22 p.m. UTC | #4
On Fri, Feb 17, 2017 at 10:07 AM, Olga Kornievskaia <aglo@umich.edu> wrote:
> On Fri, Feb 17, 2017 at 9:58 AM, Trond Myklebust
> <trondmy@primarydata.com> wrote:
>> On Fri, 2017-02-17 at 09:46 -0500, Olga Kornievskaia wrote:
>>> On Thu, Feb 16, 2017 at 6:28 PM, Trond Myklebust
>>> <trondmy@primarydata.com> wrote:
>>> > On Thu, 2017-02-16 at 17:14 -0500, Olga Kornievskaia wrote:
>>> > > On Thu, Feb 16, 2017 at 4:45 PM, Trond Myklebust
>>> > > <trondmy@primarydata.com> wrote:
>>> > > >
>>> > > > Olga, all your test is doing is showing that we hit the race
>>> > > > more
>>> > > > often. Fair enough, I’ll ask Anna to revert the patch. However
>>> > > > reverting the patch won’t prevent the server from returning
>>> > > > NFS4ERR_BAD_STATEID in any cases where the calls to
>>> > > > nfs4_set_rw_stateid() happen before state recovery. This is why
>>> > > > we
>>> > > > have the loop in nfs42_proc_copy().
>>> > >
>>> > > I thought that if retry of the operation waits for the recovery
>>> > > to
>>> > > complete then nfs4_set_rw_stateid() will choose the correct
>>> > > stateid,
>>> > > is that not correct?
>>> > >
>>> > > If that's not correct, then we somehow need to separate the case
>>> > > when
>>> > > BAD_STATEID should indeed mark the locks lost vs this case where
>>> > > the
>>> > > code erroneously used the bad stateid (oops) and it should really
>>> > > ignore this error. This really doesn't sound very plausible to
>>> > > accomplish.
>>> >
>>> > Does this patch fix the problem?
>>> >
>>> > 8<-------------------------------------------------------------
>>> > From bbae95e8f97cad6a91ca8caa50b61cae789632f9 Mon Sep 17 00:00:00
>>> > 2001
>>> > From: Trond Myklebust <trond.myklebust@primarydata.com>
>>> > Date: Thu, 16 Feb 2017 18:14:38 -0500
>>> > Subject: [PATCH] NFSv4: Fix reboot recovery in copy offload
>>> >
>>> > Copy offload code needs to be hooked into the code for handling
>>> > NFS4ERR_BAD_STATEID by ensuring that we set the "stateid" field
>>> > in struct nfs4_exception.
>>> >
>>> > Reported-by: Olga Kornievskaia <aglo@umich.edu>
>>> > Fixes: 2e72448b07dc3 ("NFS: Add COPY nfs operation")
>>> > Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
>>> > ---
>>> >  fs/nfs/nfs42proc.c | 57 +++++++++++++++++++++++++++++++-----------
>>> > ------------
>>> >  1 file changed, 33 insertions(+), 24 deletions(-)
>>> >
>>> > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
>>> > index d12ff9385f49..baf1fe2dc296 100644
>>> > --- a/fs/nfs/nfs42proc.c
>>> > +++ b/fs/nfs/nfs42proc.c
>>> > @@ -128,20 +128,13 @@ int nfs42_proc_deallocate(struct file *filep,
>>> > loff_t offset, loff_t len)
>>> >         return err;
>>> >  }
>>> >
>>> > -static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
>>> > +static ssize_t _nfs42_proc_copy(struct file *src,
>>> >                                 struct nfs_lock_context *src_lock,
>>> > -                               struct file *dst, loff_t pos_dst,
>>> > +                               struct file *dst,
>>> >                                 struct nfs_lock_context *dst_lock,
>>> > -                               size_t count)
>>> > +                               struct nfs42_copy_args *args,
>>> > +                               struct nfs42_copy_res *res)
>>> >  {
>>> > -       struct nfs42_copy_args args = {
>>> > -               .src_fh         = NFS_FH(file_inode(src)),
>>> > -               .src_pos        = pos_src,
>>> > -               .dst_fh         = NFS_FH(file_inode(dst)),
>>> > -               .dst_pos        = pos_dst,
>>> > -               .count          = count,
>>> > -       };
>>> > -       struct nfs42_copy_res res;
>>> >         struct rpc_message msg = {
>>> >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
>>> >                 .rpc_argp = &args,
>>> > @@ -149,9 +142,12 @@ static ssize_t _nfs42_proc_copy(struct file
>>> > *src, loff_t pos_src,
>>> >         };
>>> >         struct inode *dst_inode = file_inode(dst);
>>> >         struct nfs_server *server = NFS_SERVER(dst_inode);
>>> > +       loff_t pos_src = args->src_pos;
>>> > +       loff_t pos_dst = args->dst_pos;
>>> > +       size_t count = args->count;
>>> >         int status;
>>> >
>>> > -       status = nfs4_set_rw_stateid(&args.src_stateid, src_lock-
>>> > >open_context,
>>> > +       status = nfs4_set_rw_stateid(&args->src_stateid, src_lock-
>>> > >open_context,
>>> >                                      src_lock, FMODE_READ);
>>> >         if (status)
>>> >                 return status;
>>> > @@ -161,7 +157,7 @@ static ssize_t _nfs42_proc_copy(struct file
>>> > *src, loff_t pos_src,
>>> >         if (status)
>>> >                 return status;
>>> >
>>> > -       status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock-
>>> > >open_context,
>>> > +       status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock-
>>> > >open_context,
>>> >                                      dst_lock, FMODE_WRITE);
>>> >         if (status)
>>> >                 return status;
>>> > @@ -171,22 +167,22 @@ static ssize_t _nfs42_proc_copy(struct file
>>> > *src, loff_t pos_src,
>>> >                 return status;
>>> >
>>> >         status = nfs4_call_sync(server->client, server, &msg,
>>> > -                               &args.seq_args, &res.seq_res, 0);
>>> > +                               &args->seq_args, &res->seq_res, 0);
>>> >         if (status == -ENOTSUPP)
>>> >                 server->caps &= ~NFS_CAP_COPY;
>>> >         if (status)
>>> >                 return status;
>>> >
>>> > -       if (res.write_res.verifier.committed != NFS_FILE_SYNC) {
>>> > -               status = nfs_commit_file(dst,
>>> > &res.write_res.verifier.verifier);
>>> > +       if (res->write_res.verifier.committed != NFS_FILE_SYNC) {
>>> > +               status = nfs_commit_file(dst, &res-
>>> > >write_res.verifier.verifier);
>>> >                 if (status)
>>> >                         return status;
>>> >         }
>>> >
>>> >         truncate_pagecache_range(dst_inode, pos_dst,
>>> > -                                pos_dst + res.write_res.count);
>>> > +                                pos_dst + res->write_res.count);
>>> >
>>> > -       return res.write_res.count;
>>> > +       return res->write_res.count;
>>> >  }
>>> >
>>> >  ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
>>> > @@ -196,8 +192,22 @@ ssize_t nfs42_proc_copy(struct file *src,
>>> > loff_t pos_src,
>>> >         struct nfs_server *server = NFS_SERVER(file_inode(dst));
>>> >         struct nfs_lock_context *src_lock;
>>> >         struct nfs_lock_context *dst_lock;
>>> > -       struct nfs4_exception src_exception = { };
>>> > -       struct nfs4_exception dst_exception = { };
>>> > +       struct nfs42_copy_args args = {
>>> > +               .src_fh         = NFS_FH(file_inode(src)),
>>> > +               .src_pos        = pos_src,
>>> > +               .dst_fh         = NFS_FH(file_inode(dst)),
>>> > +               .dst_pos        = pos_dst,
>>> > +               .count          = count,
>>> > +       };
>>> > +       struct nfs42_copy_res res;
>>> > +       struct nfs4_exception src_exception = {
>>> > +               .inode          = file_inode(src),
>>> > +               .stateid        = &args.src_stateid,
>>> > +       };
>>> > +       struct nfs4_exception dst_exception = {
>>> > +               .inode          = file_inode(dst),
>>> > +               .stateid        = &args.dst_stateid,
>>> > +       };
>>> >         ssize_t err, err2;
>>> >
>>> >         if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
>>> > @@ -207,7 +217,6 @@ ssize_t nfs42_proc_copy(struct file *src,
>>> > loff_t pos_src,
>>> >         if (IS_ERR(src_lock))
>>> >                 return PTR_ERR(src_lock);
>>> >
>>> > -       src_exception.inode = file_inode(src);
>>> >         src_exception.state = src_lock->open_context->state;
>>> >
>>> >         dst_lock =
>>> > nfs_get_lock_context(nfs_file_open_context(dst));
>>> > @@ -216,13 +225,13 @@ ssize_t nfs42_proc_copy(struct file *src,
>>> > loff_t pos_src,
>>> >                 goto out_put_src_lock;
>>> >         }
>>> >
>>> > -       dst_exception.inode = file_inode(dst);
>>> >         dst_exception.state = dst_lock->open_context->state;
>>> >
>>> >         do {
>>> >                 inode_lock(file_inode(dst));
>>> > -               err = _nfs42_proc_copy(src, pos_src, src_lock,
>>> > -                                      dst, pos_dst, dst_lock,
>>> > count);
>>> > +               err = _nfs42_proc_copy(src, src_lock,
>>> > +                               dst, dst_lock,
>>> > +                               &args, &res);
>>> >                 inode_unlock(file_inode(dst));
>>> >
>>> >                 if (err == -ENOTSUPP) {
>>> > --
>>> > 2.9.3
>>>
>>> I wish it did but no it does not.
>>>
>>
>> So what is still happening? With this patch, the error handler should
>> be able to distinguish between a stateid that is up to date and one
>> that isn't.
>>
>> There might, however, still be a problem because we have 2 stateids,
>> meaning that one could be stale (and generating NFS4ERR_BAD_STATEID)
>> and the other one not. We might have to special case that, and do the
>> comparisons inside nfs42_proc_copy instead of using the generic code in
>> nfs4_handle_exception().
>
> After COPY gets the BAD_STATEID (on the old stateids), I see 4
> TEST_STATEIDs sent with 3 distinct stateids which are the new open
> stateid for the src file, the locking stateid for the source file and
> the new old stateid for the destination file. All of which are ok.

Urgh. sorry please wait. Last time I booted into the wrong kernel. You
patch currently oops. I need to fix that first. There is HOPE now it
works.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index d12ff9385f49..baf1fe2dc296 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -128,20 +128,13 @@  int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
 	return err;
 }
 
-static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
+static ssize_t _nfs42_proc_copy(struct file *src,
 				struct nfs_lock_context *src_lock,
-				struct file *dst, loff_t pos_dst,
+				struct file *dst,
 				struct nfs_lock_context *dst_lock,
-				size_t count)
+				struct nfs42_copy_args *args,
+				struct nfs42_copy_res *res)
 {
-	struct nfs42_copy_args args = {
-		.src_fh		= NFS_FH(file_inode(src)),
-		.src_pos	= pos_src,
-		.dst_fh		= NFS_FH(file_inode(dst)),
-		.dst_pos	= pos_dst,
-		.count		= count,
-	};
-	struct nfs42_copy_res res;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
 		.rpc_argp = &args,
@@ -149,9 +142,12 @@  static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
 	};
 	struct inode *dst_inode = file_inode(dst);
 	struct nfs_server *server = NFS_SERVER(dst_inode);
+	loff_t pos_src = args->src_pos;
+	loff_t pos_dst = args->dst_pos;
+	size_t count = args->count;
 	int status;
 
-	status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
+	status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context,
 				     src_lock, FMODE_READ);
 	if (status)
 		return status;
@@ -161,7 +157,7 @@  static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
 	if (status)
 		return status;
 
-	status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
+	status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context,
 				     dst_lock, FMODE_WRITE);
 	if (status)
 		return status;
@@ -171,22 +167,22 @@  static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
 		return status;
 
 	status = nfs4_call_sync(server->client, server, &msg,
-				&args.seq_args, &res.seq_res, 0);
+				&args->seq_args, &res->seq_res, 0);
 	if (status == -ENOTSUPP)
 		server->caps &= ~NFS_CAP_COPY;
 	if (status)
 		return status;
 
-	if (res.write_res.verifier.committed != NFS_FILE_SYNC) {
-		status = nfs_commit_file(dst, &res.write_res.verifier.verifier);
+	if (res->write_res.verifier.committed != NFS_FILE_SYNC) {
+		status = nfs_commit_file(dst, &res->write_res.verifier.verifier);
 		if (status)
 			return status;
 	}
 
 	truncate_pagecache_range(dst_inode, pos_dst,
-				 pos_dst + res.write_res.count);
+				 pos_dst + res->write_res.count);
 
-	return res.write_res.count;
+	return res->write_res.count;
 }
 
 ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
@@ -196,8 +192,22 @@  ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
 	struct nfs_server *server = NFS_SERVER(file_inode(dst));
 	struct nfs_lock_context *src_lock;
 	struct nfs_lock_context *dst_lock;
-	struct nfs4_exception src_exception = { };
-	struct nfs4_exception dst_exception = { };
+	struct nfs42_copy_args args = {
+		.src_fh		= NFS_FH(file_inode(src)),
+		.src_pos	= pos_src,
+		.dst_fh		= NFS_FH(file_inode(dst)),
+		.dst_pos	= pos_dst,
+		.count		= count,
+	};
+	struct nfs42_copy_res res;
+	struct nfs4_exception src_exception = {
+		.inode		= file_inode(src),
+		.stateid	= &args.src_stateid,
+	};
+	struct nfs4_exception dst_exception = {
+		.inode		= file_inode(dst),
+		.stateid	= &args.dst_stateid,
+	};
 	ssize_t err, err2;
 
 	if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
@@ -207,7 +217,6 @@  ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
 	if (IS_ERR(src_lock))
 		return PTR_ERR(src_lock);
 
-	src_exception.inode = file_inode(src);
 	src_exception.state = src_lock->open_context->state;
 
 	dst_lock = nfs_get_lock_context(nfs_file_open_context(dst));
@@ -216,13 +225,13 @@  ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
 		goto out_put_src_lock;
 	}
 
-	dst_exception.inode = file_inode(dst);
 	dst_exception.state = dst_lock->open_context->state;
 
 	do {
 		inode_lock(file_inode(dst));
-		err = _nfs42_proc_copy(src, pos_src, src_lock,
-				       dst, pos_dst, dst_lock, count);
+		err = _nfs42_proc_copy(src, src_lock,
+				dst, dst_lock,
+				&args, &res);
 		inode_unlock(file_inode(dst));
 
 		if (err == -ENOTSUPP) {