diff mbox

[1/2] NFSv4: Fix CLOSE races with OPEN

Message ID 1479140396-17779-2-git-send-email-trond.myklebust@primarydata.com
State New
Headers show

Commit Message

Trond Myklebust Nov. 14, 2016, 4:19 p.m. UTC
If the reply to a successful CLOSE call races with an OPEN to the same
file, we can end up scribbling over the stateid that represents the
new open state.
The race looks like:

  Client				Server
  ======				======

  CLOSE stateid A on file "foo"
					CLOSE stateid A, return stateid C
  OPEN file "foo"
					OPEN "foo", return stateid B
  Receive reply to OPEN
  Reset open state for "foo"
  Associate stateid B to "foo"

  Receive CLOSE for A
  Reset open state for "foo"
  Replace stateid B with C

The fix is to examine the argument of the CLOSE, and check for a match
with the current stateid "other" field. If the two do not match, then
the above race occurred, and we should just ignore the CLOSE.

Reported-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4_fs.h  |  7 +++++++
 fs/nfs/nfs4proc.c | 12 ++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

Comments

Jeff Layton Nov. 14, 2016, 4:40 p.m. UTC | #1
On Mon, 2016-11-14 at 11:19 -0500, Trond Myklebust wrote:
> If the reply to a successful CLOSE call races with an OPEN to the same
> file, we can end up scribbling over the stateid that represents the
> new open state.
> The race looks like:
> 
>   Client				Server
>   ======				======
> 
>   CLOSE stateid A on file "foo"
> 					CLOSE stateid A, return stateid C
>   OPEN file "foo"
> 					OPEN "foo", return stateid B
>   Receive reply to OPEN
>   Reset open state for "foo"
>   Associate stateid B to "foo"
> 
>   Receive CLOSE for A
>   Reset open state for "foo"
>   Replace stateid B with C
> 
> The fix is to examine the argument of the CLOSE, and check for a match
> with the current stateid "other" field. If the two do not match, then
> the above race occurred, and we should just ignore the CLOSE.
> 
> Reported-by: Benjamin Coddington <bcodding@redhat.com>
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  fs/nfs/nfs4_fs.h  |  7 +++++++
>  fs/nfs/nfs4proc.c | 12 ++++++------
>  2 files changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
> index 9b3a82abab07..1452177c822d 100644
> --- a/fs/nfs/nfs4_fs.h
> +++ b/fs/nfs/nfs4_fs.h
> @@ -542,6 +542,13 @@ static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
>  	return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
>  }
>  
> +static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state *state,
> +		const nfs4_stateid *stateid)
> +{
> +	return test_bit(NFS_OPEN_STATE, &state->flags) &&
> +		nfs4_stateid_match_other(&state->open_stateid, stateid);
> +}
> +
>  #else
>  
>  #define nfs4_close_state(a, b) do { } while (0)
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index f550ac69ffa0..b7b0080977c0 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -1458,7 +1458,6 @@ static void nfs_resync_open_stateid_locked(struct nfs4_state *state)
>  }
>  
>  static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
> -		nfs4_stateid *arg_stateid,
>  		nfs4_stateid *stateid, fmode_t fmode)
>  {
>  	clear_bit(NFS_O_RDWR_STATE, &state->flags);
> @@ -1476,10 +1475,9 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
>  	}
>  	if (stateid == NULL)
>  		return;
> -	/* Handle races with OPEN */
> -	if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) ||
> -	    (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
> -	    !nfs4_stateid_is_newer(stateid, &state->open_stateid))) {
> +	/* Handle OPEN+OPEN_DOWNGRADE races */
> +	if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
> +	    !nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
>  		nfs_resync_open_stateid_locked(state);
>  		return;
>  	}
> @@ -1493,7 +1491,9 @@ static void nfs_clear_open_stateid(struct nfs4_state *state,
>  	nfs4_stateid *stateid, fmode_t fmode)
>  {
>  	write_seqlock(&state->seqlock);
> -	nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode);
> +	/* Ignore, if the CLOSE argment doesn't match the current stateid */
> +	if (nfs4_state_match_open_stateid_other(state, arg_stateid))
> +		nfs_clear_open_stateid_locked(state, stateid, fmode);
>  	write_sequnlock(&state->seqlock);
>  	if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
>  		nfs4_schedule_state_manager(state->owner->so_server->nfs_client);

I still don't quite get it. What's the point of paying any attention at
all to the stateid returned by the server in a CLOSE response? It's
either:

a) completely bogus, if the server is following the SHOULD in RFC5661,
section 18.2.4

...or...

b) refers to a now-defunct stateid -- probably a later version of the
one sent in the request, but the spec doesn't really spell that out,
AFAICT.

In either case, I don't think it ought to be trusted. Why not just use
the arg_stateid universally here?
Trond Myklebust Nov. 14, 2016, 4:48 p.m. UTC | #2
On Mon, 2016-11-14 at 11:40 -0500, Jeff Layton wrote:
> On Mon, 2016-11-14 at 11:19 -0500, Trond Myklebust wrote:

> > 

> > If the reply to a successful CLOSE call races with an OPEN to the

> > same

> > file, we can end up scribbling over the stateid that represents the

> > new open state.

> > The race looks like:

> > 

> >   Client				Server

> >   ======				======

> > 

> >   CLOSE stateid A on file "foo"

> > 					CLOSE stateid A, return stateid

> > C

> >   OPEN file "foo"

> > 					OPEN "foo", return stateid B

> >   Receive reply to OPEN

> >   Reset open state for "foo"

> >   Associate stateid B to "foo"

> > 

> >   Receive CLOSE for A

> >   Reset open state for "foo"

> >   Replace stateid B with C

> > 

> > The fix is to examine the argument of the CLOSE, and check for a

> > match

> > with the current stateid "other" field. If the two do not match,

> > then

> > the above race occurred, and we should just ignore the CLOSE.

> > 

> > Reported-by: Benjamin Coddington <bcodding@redhat.com>

> > Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

> > ---

> >  fs/nfs/nfs4_fs.h  |  7 +++++++

> >  fs/nfs/nfs4proc.c | 12 ++++++------

> >  2 files changed, 13 insertions(+), 6 deletions(-)

> > 

> > diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h

> > index 9b3a82abab07..1452177c822d 100644

> > --- a/fs/nfs/nfs4_fs.h

> > +++ b/fs/nfs/nfs4_fs.h

> > @@ -542,6 +542,13 @@ static inline bool

> > nfs4_valid_open_stateid(const struct nfs4_state *state)

> >  	return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags)

> > == 0;

> >  }

> >  

> > +static inline bool nfs4_state_match_open_stateid_other(const

> > struct nfs4_state *state,

> > +		const nfs4_stateid *stateid)

> > +{

> > +	return test_bit(NFS_OPEN_STATE, &state->flags) &&

> > +		nfs4_stateid_match_other(&state->open_stateid,

> > stateid);

> > +}

> > +

> >  #else

> >  

> >  #define nfs4_close_state(a, b) do { } while (0)

> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

> > index f550ac69ffa0..b7b0080977c0 100644

> > --- a/fs/nfs/nfs4proc.c

> > +++ b/fs/nfs/nfs4proc.c

> > @@ -1458,7 +1458,6 @@ static void

> > nfs_resync_open_stateid_locked(struct nfs4_state *state)

> >  }

> >  

> >  static void nfs_clear_open_stateid_locked(struct nfs4_state

> > *state,

> > -		nfs4_stateid *arg_stateid,

> >  		nfs4_stateid *stateid, fmode_t fmode)

> >  {

> >  	clear_bit(NFS_O_RDWR_STATE, &state->flags);

> > @@ -1476,10 +1475,9 @@ static void

> > nfs_clear_open_stateid_locked(struct nfs4_state *state,

> >  	}

> >  	if (stateid == NULL)

> >  		return;

> > -	/* Handle races with OPEN */

> > -	if (!nfs4_stateid_match_other(arg_stateid, &state-

> > >open_stateid) ||

> > -	    (nfs4_stateid_match_other(stateid, &state-

> > >open_stateid) &&

> > -	    !nfs4_stateid_is_newer(stateid, &state-

> > >open_stateid))) {

> > +	/* Handle OPEN+OPEN_DOWNGRADE races */

> > +	if (nfs4_stateid_match_other(stateid, &state-

> > >open_stateid) &&

> > +	    !nfs4_stateid_is_newer(stateid, &state->open_stateid)) 

> > {

> >  		nfs_resync_open_stateid_locked(state);

> >  		return;

> >  	}

> > @@ -1493,7 +1491,9 @@ static void nfs_clear_open_stateid(struct

> > nfs4_state *state,

> >  	nfs4_stateid *stateid, fmode_t fmode)

> >  {

> >  	write_seqlock(&state->seqlock);

> > -	nfs_clear_open_stateid_locked(state, arg_stateid, stateid,

> > fmode);

> > +	/* Ignore, if the CLOSE argment doesn't match the current

> > stateid */

> > +	if (nfs4_state_match_open_stateid_other(state,

> > arg_stateid))

> > +		nfs_clear_open_stateid_locked(state, stateid,

> > fmode);

> >  	write_sequnlock(&state->seqlock);

> >  	if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))

> >  		nfs4_schedule_state_manager(state->owner-

> > >so_server->nfs_client);

> 

> I still don't quite get it. What's the point of paying any attention

> at

> all to the stateid returned by the server in a CLOSE response? It's

> either:

> 

> a) completely bogus, if the server is following the SHOULD in

> RFC5661,

> section 18.2.4

> 

> ...or...

> 

> b) refers to a now-defunct stateid -- probably a later version of the

> one sent in the request, but the spec doesn't really spell that out,

> AFAICT.

> 

> In either case, I don't think it ought to be trusted. Why not just

> use

> the arg_stateid universally here?

> 


The code path also has to handle OPEN_DOWNGRADE.
Olga Kornievskaia Feb. 13, 2018, 8:08 p.m. UTC | #3
On Mon, Nov 14, 2016 at 11:19 AM, Trond Myklebust
<trond.myklebust@primarydata.com> wrote:
> If the reply to a successful CLOSE call races with an OPEN to the same
> file, we can end up scribbling over the stateid that represents the
> new open state.
> The race looks like:
>
>   Client                                Server
>   ======                                ======
>
>   CLOSE stateid A on file "foo"
>                                         CLOSE stateid A, return stateid C

Hi folks,

I'd like to understand this particular issue. Specifically I don't
understand how can server return stateid C to the close with stateid
A.

As per RFC 7530 or 5661. It says that state is returned by the close
shouldn't be used.

Even though CLOSE returns a stateid, this stateid is not useful to
   the client and should be treated as deprecated.  CLOSE "shuts down"
   the state associated with all OPENs for the file by a single
   open-owner.  As noted above, CLOSE will either release all file
   locking state or return an error.  Therefore, the stateid returned by
   CLOSE is not useful for the operations that follow.

Is this because the spec says "should" and not a "must"?

Linux server increments a state's sequenceid on CLOSE. Ontap server
does not. I'm not sure what other servers do. Are all these
implementations equality correct?

>   OPEN file "foo"
>                                         OPEN "foo", return stateid B
>   Receive reply to OPEN
>   Reset open state for "foo"
>   Associate stateid B to "foo"
>
>   Receive CLOSE for A
>   Reset open state for "foo"
>   Replace stateid B with C
>
> The fix is to examine the argument of the CLOSE, and check for a match
> with the current stateid "other" field. If the two do not match, then
> the above race occurred, and we should just ignore the CLOSE.
>
> Reported-by: Benjamin Coddington <bcodding@redhat.com>
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  fs/nfs/nfs4_fs.h  |  7 +++++++
>  fs/nfs/nfs4proc.c | 12 ++++++------
>  2 files changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
> index 9b3a82abab07..1452177c822d 100644
> --- a/fs/nfs/nfs4_fs.h
> +++ b/fs/nfs/nfs4_fs.h
> @@ -542,6 +542,13 @@ static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
>         return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
>  }
>
> +static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state *state,
> +               const nfs4_stateid *stateid)
> +{
> +       return test_bit(NFS_OPEN_STATE, &state->flags) &&
> +               nfs4_stateid_match_other(&state->open_stateid, stateid);
> +}
> +
>  #else
>
>  #define nfs4_close_state(a, b) do { } while (0)
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index f550ac69ffa0..b7b0080977c0 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -1458,7 +1458,6 @@ static void nfs_resync_open_stateid_locked(struct nfs4_state *state)
>  }
>
>  static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
> -               nfs4_stateid *arg_stateid,
>                 nfs4_stateid *stateid, fmode_t fmode)
>  {
>         clear_bit(NFS_O_RDWR_STATE, &state->flags);
> @@ -1476,10 +1475,9 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
>         }
>         if (stateid == NULL)
>                 return;
> -       /* Handle races with OPEN */
> -       if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) ||
> -           (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
> -           !nfs4_stateid_is_newer(stateid, &state->open_stateid))) {
> +       /* Handle OPEN+OPEN_DOWNGRADE races */
> +       if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
> +           !nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
>                 nfs_resync_open_stateid_locked(state);
>                 return;
>         }
> @@ -1493,7 +1491,9 @@ static void nfs_clear_open_stateid(struct nfs4_state *state,
>         nfs4_stateid *stateid, fmode_t fmode)
>  {
>         write_seqlock(&state->seqlock);
> -       nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode);
> +       /* Ignore, if the CLOSE argment doesn't match the current stateid */
> +       if (nfs4_state_match_open_stateid_other(state, arg_stateid))
> +               nfs_clear_open_stateid_locked(state, stateid, fmode);
>         write_sequnlock(&state->seqlock);
>         if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
>                 nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
> --
> 2.7.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Coddington Feb. 14, 2018, 3:05 p.m. UTC | #4
Hi Olga,

On 13 Feb 2018, at 15:08, Olga Kornievskaia wrote:

> On Mon, Nov 14, 2016 at 11:19 AM, Trond Myklebust
> <trond.myklebust@primarydata.com> wrote:
>> If the reply to a successful CLOSE call races with an OPEN to the same
>> file, we can end up scribbling over the stateid that represents the
>> new open state.
>> The race looks like:
>>
>>   Client                                Server
>>   ======                                ======
>>
>>   CLOSE stateid A on file "foo"
>>                                         CLOSE stateid A, return stateid C
>
> Hi folks,
>
> I'd like to understand this particular issue. Specifically I don't
> understand how can server return stateid C to the close with stateid
> A.

I think in this explanation of the race, stateid C is an incremented version
of A -- the stateid's "other" fields match -- OR it is the invalid special
stateid according to RFC 5661, Section 18.2.4.

> As per RFC 7530 or 5661. It says that state is returned by the close
> shouldn't be used.
>
> Even though CLOSE returns a stateid, this stateid is not useful to
>    the client and should be treated as deprecated.  CLOSE "shuts down"
>    the state associated with all OPENs for the file by a single
>    open-owner.  As noted above, CLOSE will either release all file
>    locking state or return an error.  Therefore, the stateid returned by
>    CLOSE is not useful for the operations that follow.
>
> Is this because the spec says "should" and not a "must"?

I don't understand - is what because?  The state returned from CLOSE is
useful to be used by the client for housekeeping, but it won't be re-used in
the protocol.

> Linux server increments a state's sequenceid on CLOSE. Ontap server
> does not. I'm not sure what other servers do. Are all these
> implementations equality correct?

Ah, good question there..  Even though the stateid is not useful for
operations that follow, I think the sequenceid should be incremented because
the CLOSE is an operation that modifies the set of locks or state:

In RFC 7530, Section 9.1.4.2.:
...
   When such a set of locks is first created, the server returns a
   stateid with a seqid value of one.  On subsequent operations that
   modify the set of locks, the server is required to advance the
   seqid field by one whenever it returns a stateid for the same
   state-owner/file/type combination and the operation is one that might
   make some change in the set of locks actually designated.  In this
   case, the server will return a stateid with an "other" field the same
   as previously used for that state-owner/file/type combination, with
   an incremented seqid field.

But, in RFC 5661, the CLOSE response SHOULD be the invalid special stateid.
I don't recall, does the linux server increment the existing stateid or send
back the invalid special stateid on v4.1?

Ben
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton Feb. 14, 2018, 3:21 p.m. UTC | #5
On Wed, 2018-02-14 at 10:05 -0500, Benjamin Coddington wrote:
> Hi Olga,
> 
> On 13 Feb 2018, at 15:08, Olga Kornievskaia wrote:
> 
> > On Mon, Nov 14, 2016 at 11:19 AM, Trond Myklebust
> > <trond.myklebust@primarydata.com> wrote:
> > > If the reply to a successful CLOSE call races with an OPEN to the same
> > > file, we can end up scribbling over the stateid that represents the
> > > new open state.
> > > The race looks like:
> > > 
> > >   Client                                Server
> > >   ======                                ======
> > > 
> > >   CLOSE stateid A on file "foo"
> > >                                         CLOSE stateid A, return stateid C
> > 
> > Hi folks,
> > 
> > I'd like to understand this particular issue. Specifically I don't
> > understand how can server return stateid C to the close with stateid
> > A.
> 
> I think in this explanation of the race, stateid C is an incremented version
> of A -- the stateid's "other" fields match -- OR it is the invalid special
> stateid according to RFC 5661, Section 18.2.4.
> 
> > As per RFC 7530 or 5661. It says that state is returned by the close
> > shouldn't be used.
> > 
> > Even though CLOSE returns a stateid, this stateid is not useful to
> >    the client and should be treated as deprecated.  CLOSE "shuts down"
> >    the state associated with all OPENs for the file by a single
> >    open-owner.  As noted above, CLOSE will either release all file
> >    locking state or return an error.  Therefore, the stateid returned by
> >    CLOSE is not useful for the operations that follow.
> > 
> > Is this because the spec says "should" and not a "must"?
> 
> I don't understand - is what because?  The state returned from CLOSE is
> useful to be used by the client for housekeeping, but it won't be re-used in
> the protocol.
> 
> > Linux server increments a state's sequenceid on CLOSE. Ontap server
> > does not. I'm not sure what other servers do. Are all these
> > implementations equality correct?
> 
> Ah, good question there..  Even though the stateid is not useful for
> operations that follow, I think the sequenceid should be incremented because
> the CLOSE is an operation that modifies the set of locks or state:
> 

It doesn't matter.

> In RFC 7530, Section 9.1.4.2.:
> ...
>    When such a set of locks is first created, the server returns a
>    stateid with a seqid value of one.  On subsequent operations that
>    modify the set of locks, the server is required to advance the
>    seqid field by one whenever it returns a stateid for the same
>    state-owner/file/type combination and the operation is one that might
>    make some change in the set of locks actually designated.  In this
>    case, the server will return a stateid with an "other" field the same
>    as previously used for that state-owner/file/type combination, with
>    an incremented seqid field.
> 
> But, in RFC 5661, the CLOSE response SHOULD be the invalid special stateid.
> I don't recall, does the linux server increment the existing stateid or send
> back the invalid special stateid on v4.1?
> 

A CLOSE, by definition, is destroying the old stateid, so what does it
matter what you return on success? It's bogus either way.

If knfsd is sending back a "real" stateid there, then it's likely only
because that's what the v4.0 spec said to do ~10 years ago. It's
probably fine to fix it to just return the invalid special stateid and
call it a day. All clients should just ignore it.
Trond Myklebust Feb. 14, 2018, 3:29 p.m. UTC | #6
On Wed, 2018-02-14 at 10:21 -0500, Jeff Layton wrote:
> On Wed, 2018-02-14 at 10:05 -0500, Benjamin Coddington wrote:

> > Hi Olga,

> > 

> > On 13 Feb 2018, at 15:08, Olga Kornievskaia wrote:

> > 

> > > On Mon, Nov 14, 2016 at 11:19 AM, Trond Myklebust

> > > <trond.myklebust@primarydata.com> wrote:

> > > > If the reply to a successful CLOSE call races with an OPEN to

> > > > the same

> > > > file, we can end up scribbling over the stateid that represents

> > > > the

> > > > new open state.

> > > > The race looks like:

> > > > 

> > > >   Client                                Server

> > > >   ======                                ======

> > > > 

> > > >   CLOSE stateid A on file "foo"

> > > >                                         CLOSE stateid A, return

> > > > stateid C

> > > 

> > > Hi folks,

> > > 

> > > I'd like to understand this particular issue. Specifically I

> > > don't

> > > understand how can server return stateid C to the close with

> > > stateid

> > > A.

> > 

> > I think in this explanation of the race, stateid C is an

> > incremented version

> > of A -- the stateid's "other" fields match -- OR it is the invalid

> > special

> > stateid according to RFC 5661, Section 18.2.4.

> > 

> > > As per RFC 7530 or 5661. It says that state is returned by the

> > > close

> > > shouldn't be used.

> > > 

> > > Even though CLOSE returns a stateid, this stateid is not useful

> > > to

> > >    the client and should be treated as deprecated.  CLOSE "shuts

> > > down"

> > >    the state associated with all OPENs for the file by a single

> > >    open-owner.  As noted above, CLOSE will either release all

> > > file

> > >    locking state or return an error.  Therefore, the stateid

> > > returned by

> > >    CLOSE is not useful for the operations that follow.

> > > 

> > > Is this because the spec says "should" and not a "must"?

> > 

> > I don't understand - is what because?  The state returned from

> > CLOSE is

> > useful to be used by the client for housekeeping, but it won't be

> > re-used in

> > the protocol.

> > 

> > > Linux server increments a state's sequenceid on CLOSE. Ontap

> > > server

> > > does not. I'm not sure what other servers do. Are all these

> > > implementations equality correct?

> > 

> > Ah, good question there..  Even though the stateid is not useful

> > for

> > operations that follow, I think the sequenceid should be

> > incremented because

> > the CLOSE is an operation that modifies the set of locks or state:

> > 

> 

> It doesn't matter.

> 

> > In RFC 7530, Section 9.1.4.2.:

> > ...

> >    When such a set of locks is first created, the server returns a

> >    stateid with a seqid value of one.  On subsequent operations

> > that

> >    modify the set of locks, the server is required to advance the

> >    seqid field by one whenever it returns a stateid for the same

> >    state-owner/file/type combination and the operation is one that

> > might

> >    make some change in the set of locks actually designated.  In

> > this

> >    case, the server will return a stateid with an "other" field the

> > same

> >    as previously used for that state-owner/file/type combination,

> > with

> >    an incremented seqid field.

> > 

> > But, in RFC 5661, the CLOSE response SHOULD be the invalid special

> > stateid.

> > I don't recall, does the linux server increment the existing

> > stateid or send

> > back the invalid special stateid on v4.1?

> > 

> 

> A CLOSE, by definition, is destroying the old stateid, so what does

> it

> matter what you return on success? It's bogus either way.

> 

> If knfsd is sending back a "real" stateid there, then it's likely

> only

> because that's what the v4.0 spec said to do ~10 years ago. It's

> probably fine to fix it to just return the invalid special stateid

> and

> call it a day. All clients should just ignore it.

> 


Is there a proposal to change the current client behaviour here? As far
as I can tell, the answer is "no". Am I missing something?

-- 
Trond Myklebust
Linux NFS client maintainer, PrimaryData
trond.myklebust@primarydata.com
Benjamin Coddington Feb. 14, 2018, 3:42 p.m. UTC | #7
On 14 Feb 2018, at 10:29, Trond Myklebust wrote:
> On Wed, 2018-02-14 at 10:21 -0500, Jeff Layton wrote:
>> On Wed, 2018-02-14 at 10:05 -0500, Benjamin Coddington wrote:
>>> Hi Olga,
>>>
>>> On 13 Feb 2018, at 15:08, Olga Kornievskaia wrote:
>>>
>>>> On Mon, Nov 14, 2016 at 11:19 AM, Trond Myklebust
>>>> <trond.myklebust@primarydata.com> wrote:
>>>> ...
>>> Ah, good question there..  Even though the stateid is not useful
>>> for
>>> operations that follow, I think the sequenceid should be
>>> incremented because
>>> the CLOSE is an operation that modifies the set of locks or state:
>>>
>>
>> It doesn't matter.

Yes, good condensed point.  It doesn't matter.

>>> ...
> Is there a proposal to change the current client behaviour here? As far
> as I can tell, the answer is "no". Am I missing something?

Not that I can see.  I think we're just comparing linux and netapp server
behaviors..

Ben
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olga Kornievskaia Feb. 14, 2018, 4:06 p.m. UTC | #8
On Wed, Feb 14, 2018 at 10:42 AM, Benjamin Coddington
<bcodding@redhat.com> wrote:
> On 14 Feb 2018, at 10:29, Trond Myklebust wrote:
>> On Wed, 2018-02-14 at 10:21 -0500, Jeff Layton wrote:
>>> On Wed, 2018-02-14 at 10:05 -0500, Benjamin Coddington wrote:
>>>> Hi Olga,
>>>>
>>>> On 13 Feb 2018, at 15:08, Olga Kornievskaia wrote:
>>>>
>>>>> On Mon, Nov 14, 2016 at 11:19 AM, Trond Myklebust
>>>>> <trond.myklebust@primarydata.com> wrote:
>>>>> ...
>>>> Ah, good question there..  Even though the stateid is not useful
>>>> for
>>>> operations that follow, I think the sequenceid should be
>>>> incremented because
>>>> the CLOSE is an operation that modifies the set of locks or state:
>>>>
>>>
>>> It doesn't matter.
>
> Yes, good condensed point.  It doesn't matter.
>
>>>> ...
>> Is there a proposal to change the current client behaviour here? As far
>> as I can tell, the answer is "no". Am I missing something?
>
> Not that I can see.  I think we're just comparing linux and netapp server
> behaviors..
>
> Ben

I just found very surprising that in nfs4_close_done() which calls
eventually calls nfs_clear_open_stateid_locked() we change the state
based on the stateid received from the CLOSE.
nfs_clear_open_stateid_locked() is only called from nfs4_close_done()
and no other function.

I guess I'm not wondering if we had this problem described in this
patch of the delayed CLOSE, if we didn't update the state after
receiving the close... (so yes this is a weak proposal).
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Trond Myklebust Feb. 14, 2018, 4:59 p.m. UTC | #9
T24gV2VkLCAyMDE4LTAyLTE0IGF0IDExOjA2IC0wNTAwLCBPbGdhIEtvcm5pZXZza2FpYSB3cm90
ZToNCj4gT24gV2VkLCBGZWIgMTQsIDIwMTggYXQgMTA6NDIgQU0sIEJlbmphbWluIENvZGRpbmd0
b24NCj4gPGJjb2RkaW5nQHJlZGhhdC5jb20+IHdyb3RlOg0KPiA+IE9uIDE0IEZlYiAyMDE4LCBh
dCAxMDoyOSwgVHJvbmQgTXlrbGVidXN0IHdyb3RlOg0KPiA+ID4gT24gV2VkLCAyMDE4LTAyLTE0
IGF0IDEwOjIxIC0wNTAwLCBKZWZmIExheXRvbiB3cm90ZToNCj4gPiA+ID4gT24gV2VkLCAyMDE4
LTAyLTE0IGF0IDEwOjA1IC0wNTAwLCBCZW5qYW1pbiBDb2RkaW5ndG9uIHdyb3RlOg0KPiA+ID4g
PiA+IEhpIE9sZ2EsDQo+ID4gPiA+ID4gDQo+ID4gPiA+ID4gT24gMTMgRmViIDIwMTgsIGF0IDE1
OjA4LCBPbGdhIEtvcm5pZXZza2FpYSB3cm90ZToNCj4gPiA+ID4gPiANCj4gPiA+ID4gPiA+IE9u
IE1vbiwgTm92IDE0LCAyMDE2IGF0IDExOjE5IEFNLCBUcm9uZCBNeWtsZWJ1c3QNCj4gPiA+ID4g
PiA+IDx0cm9uZC5teWtsZWJ1c3RAcHJpbWFyeWRhdGEuY29tPiB3cm90ZToNCj4gPiA+ID4gPiA+
IC4uLg0KPiA+ID4gPiA+IA0KPiA+ID4gPiA+IEFoLCBnb29kIHF1ZXN0aW9uIHRoZXJlLi4gIEV2
ZW4gdGhvdWdoIHRoZSBzdGF0ZWlkIGlzIG5vdA0KPiA+ID4gPiA+IHVzZWZ1bA0KPiA+ID4gPiA+
IGZvcg0KPiA+ID4gPiA+IG9wZXJhdGlvbnMgdGhhdCBmb2xsb3csIEkgdGhpbmsgdGhlIHNlcXVl
bmNlaWQgc2hvdWxkIGJlDQo+ID4gPiA+ID4gaW5jcmVtZW50ZWQgYmVjYXVzZQ0KPiA+ID4gPiA+
IHRoZSBDTE9TRSBpcyBhbiBvcGVyYXRpb24gdGhhdCBtb2RpZmllcyB0aGUgc2V0IG9mIGxvY2tz
IG9yDQo+ID4gPiA+ID4gc3RhdGU6DQo+ID4gPiA+ID4gDQo+ID4gPiA+IA0KPiA+ID4gPiBJdCBk
b2Vzbid0IG1hdHRlci4NCj4gPiANCj4gPiBZZXMsIGdvb2QgY29uZGVuc2VkIHBvaW50LiAgSXQg
ZG9lc24ndCBtYXR0ZXIuDQo+ID4gDQo+ID4gPiA+ID4gLi4uDQo+ID4gPiANCj4gPiA+IElzIHRo
ZXJlIGEgcHJvcG9zYWwgdG8gY2hhbmdlIHRoZSBjdXJyZW50IGNsaWVudCBiZWhhdmlvdXIgaGVy
ZT8NCj4gPiA+IEFzIGZhcg0KPiA+ID4gYXMgSSBjYW4gdGVsbCwgdGhlIGFuc3dlciBpcyAibm8i
LiBBbSBJIG1pc3Npbmcgc29tZXRoaW5nPw0KPiA+IA0KPiA+IE5vdCB0aGF0IEkgY2FuIHNlZS4g
IEkgdGhpbmsgd2UncmUganVzdCBjb21wYXJpbmcgbGludXggYW5kIG5ldGFwcA0KPiA+IHNlcnZl
cg0KPiA+IGJlaGF2aW9ycy4uDQo+ID4gDQo+ID4gQmVuDQo+IA0KPiBJIGp1c3QgZm91bmQgdmVy
eSBzdXJwcmlzaW5nIHRoYXQgaW4gbmZzNF9jbG9zZV9kb25lKCkgd2hpY2ggY2FsbHMNCj4gZXZl
bnR1YWxseSBjYWxscyBuZnNfY2xlYXJfb3Blbl9zdGF0ZWlkX2xvY2tlZCgpIHdlIGNoYW5nZSB0
aGUgc3RhdGUNCj4gYmFzZWQgb24gdGhlIHN0YXRlaWQgcmVjZWl2ZWQgZnJvbSB0aGUgQ0xPU0Uu
DQo+IG5mc19jbGVhcl9vcGVuX3N0YXRlaWRfbG9ja2VkKCkgaXMgb25seSBjYWxsZWQgZnJvbSBu
ZnM0X2Nsb3NlX2RvbmUoKQ0KPiBhbmQgbm8gb3RoZXIgZnVuY3Rpb24uDQo+IA0KPiBJIGd1ZXNz
IEknbSBub3Qgd29uZGVyaW5nIGlmIHdlIGhhZCB0aGlzIHByb2JsZW0gZGVzY3JpYmVkIGluIHRo
aXMNCj4gcGF0Y2ggb2YgdGhlIGRlbGF5ZWQgQ0xPU0UsIGlmIHdlIGRpZG4ndCB1cGRhdGUgdGhl
IHN0YXRlIGFmdGVyDQo+IHJlY2VpdmluZyB0aGUgY2xvc2UuLi4gKHNvIHllcyB0aGlzIGlzIGEg
d2VhayBwcm9wb3NhbCkuDQo+IA0KDQpuZnM0X2Nsb3NlX2RvbmUoKSBkb2Vzbid0IG9ubHkgZGVh
bCB3aXRoIENMT1NFLiBJdCBhbHNvIGhhcyB0byBoYW5kbGUNCk9QRU5fRE9XTkdSQURFLg0KDQot
LSANClRyb25kIE15a2xlYnVzdA0KTGludXggTkZTIGNsaWVudCBtYWludGFpbmVyLCBQcmltYXJ5
RGF0YQ0KdHJvbmQubXlrbGVidXN0QHByaW1hcnlkYXRhLmNvbQ0K

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tigran Mkrtchyan Feb. 15, 2018, 12:19 p.m. UTC | #10
----- Original Message -----
> From: "Olga Kornievskaia" <aglo@umich.edu>
> To: "Trond Myklebust" <trond.myklebust@primarydata.com>
> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>, "Benjamin Coddington" <bcodding@redhat.com>, "Jeff Layton"
> <jlayton@redhat.com>
> Sent: Tuesday, February 13, 2018 9:08:01 PM
> Subject: Re: [PATCH 1/2] NFSv4: Fix CLOSE races with OPEN

> On Mon, Nov 14, 2016 at 11:19 AM, Trond Myklebust
> <trond.myklebust@primarydata.com> wrote:
>> If the reply to a successful CLOSE call races with an OPEN to the same
>> file, we can end up scribbling over the stateid that represents the
>> new open state.
>> The race looks like:
>>
>>   Client                                Server
>>   ======                                ======
>>
>>   CLOSE stateid A on file "foo"
>>                                         CLOSE stateid A, return stateid C
> 
> Hi folks,
> 
> I'd like to understand this particular issue. Specifically I don't
> understand how can server return stateid C to the close with stateid
> A.
> 
> As per RFC 7530 or 5661. It says that state is returned by the close
> shouldn't be used.
> 
> Even though CLOSE returns a stateid, this stateid is not useful to
>   the client and should be treated as deprecated.  CLOSE "shuts down"
>   the state associated with all OPENs for the file by a single
>   open-owner.  As noted above, CLOSE will either release all file
>   locking state or return an error.  Therefore, the stateid returned by
>   CLOSE is not useful for the operations that follow.
> 
> Is this because the spec says "should" and not a "must"?
> 
> Linux server increments a state's sequenceid on CLOSE. Ontap server
> does not. I'm not sure what other servers do. Are all these


Our server sends back invalid state id for v4.1 and v4.0.

Tigran.

> implementations equality correct?
> 
>>   OPEN file "foo"
>>                                         OPEN "foo", return stateid B
>>   Receive reply to OPEN
>>   Reset open state for "foo"
>>   Associate stateid B to "foo"
>>
>>   Receive CLOSE for A
>>   Reset open state for "foo"
>>   Replace stateid B with C
>>
>> The fix is to examine the argument of the CLOSE, and check for a match
>> with the current stateid "other" field. If the two do not match, then
>> the above race occurred, and we should just ignore the CLOSE.
>>
>> Reported-by: Benjamin Coddington <bcodding@redhat.com>
>> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
>> ---
>>  fs/nfs/nfs4_fs.h  |  7 +++++++
>>  fs/nfs/nfs4proc.c | 12 ++++++------
>>  2 files changed, 13 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
>> index 9b3a82abab07..1452177c822d 100644
>> --- a/fs/nfs/nfs4_fs.h
>> +++ b/fs/nfs/nfs4_fs.h
>> @@ -542,6 +542,13 @@ static inline bool nfs4_valid_open_stateid(const struct
>> nfs4_state *state)
>>         return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
>>  }
>>
>> +static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state
>> *state,
>> +               const nfs4_stateid *stateid)
>> +{
>> +       return test_bit(NFS_OPEN_STATE, &state->flags) &&
>> +               nfs4_stateid_match_other(&state->open_stateid, stateid);
>> +}
>> +
>>  #else
>>
>>  #define nfs4_close_state(a, b) do { } while (0)
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index f550ac69ffa0..b7b0080977c0 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -1458,7 +1458,6 @@ static void nfs_resync_open_stateid_locked(struct
>> nfs4_state *state)
>>  }
>>
>>  static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
>> -               nfs4_stateid *arg_stateid,
>>                 nfs4_stateid *stateid, fmode_t fmode)
>>  {
>>         clear_bit(NFS_O_RDWR_STATE, &state->flags);
>> @@ -1476,10 +1475,9 @@ static void nfs_clear_open_stateid_locked(struct
>> nfs4_state *state,
>>         }
>>         if (stateid == NULL)
>>                 return;
>> -       /* Handle races with OPEN */
>> -       if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) ||
>> -           (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
>> -           !nfs4_stateid_is_newer(stateid, &state->open_stateid))) {
>> +       /* Handle OPEN+OPEN_DOWNGRADE races */
>> +       if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
>> +           !nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
>>                 nfs_resync_open_stateid_locked(state);
>>                 return;
>>         }
>> @@ -1493,7 +1491,9 @@ static void nfs_clear_open_stateid(struct nfs4_state
>> *state,
>>         nfs4_stateid *stateid, fmode_t fmode)
>>  {
>>         write_seqlock(&state->seqlock);
>> -       nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode);
>> +       /* Ignore, if the CLOSE argment doesn't match the current stateid */
>> +       if (nfs4_state_match_open_stateid_other(state, arg_stateid))
>> +               nfs_clear_open_stateid_locked(state, stateid, fmode);
>>         write_sequnlock(&state->seqlock);
>>         if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
>>                 nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
>> --
>> 2.7.4
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton Feb. 15, 2018, 12:23 p.m. UTC | #11
On Thu, 2018-02-15 at 13:19 +0100, Mkrtchyan, Tigran wrote:
> 
> ----- Original Message -----
> > From: "Olga Kornievskaia" <aglo@umich.edu>
> > To: "Trond Myklebust" <trond.myklebust@primarydata.com>
> > Cc: "linux-nfs" <linux-nfs@vger.kernel.org>, "Benjamin Coddington" <bcodding@redhat.com>, "Jeff Layton"
> > <jlayton@redhat.com>
> > Sent: Tuesday, February 13, 2018 9:08:01 PM
> > Subject: Re: [PATCH 1/2] NFSv4: Fix CLOSE races with OPEN
> > On Mon, Nov 14, 2016 at 11:19 AM, Trond Myklebust
> > <trond.myklebust@primarydata.com> wrote:
> > > If the reply to a successful CLOSE call races with an OPEN to the same
> > > file, we can end up scribbling over the stateid that represents the
> > > new open state.
> > > The race looks like:
> > > 
> > >   Client                                Server
> > >   ======                                ======
> > > 
> > >   CLOSE stateid A on file "foo"
> > >                                         CLOSE stateid A, return stateid C
> > 
> > Hi folks,
> > 
> > I'd like to understand this particular issue. Specifically I don't
> > understand how can server return stateid C to the close with stateid
> > A.
> > 
> > As per RFC 7530 or 5661. It says that state is returned by the close
> > shouldn't be used.
> > 
> > Even though CLOSE returns a stateid, this stateid is not useful to
> >   the client and should be treated as deprecated.  CLOSE "shuts down"
> >   the state associated with all OPENs for the file by a single
> >   open-owner.  As noted above, CLOSE will either release all file
> >   locking state or return an error.  Therefore, the stateid returned by
> >   CLOSE is not useful for the operations that follow.
> > 
> > Is this because the spec says "should" and not a "must"?
> > 
> > Linux server increments a state's sequenceid on CLOSE. Ontap server
> > does not. I'm not sure what other servers do. Are all these
> 
> 
> Our server sends back invalid state id for v4.1 and v4.0.
> 
> Tigran.
> 

That's probably the best thing to do, and we should probably do the same
 for v4.0 in knfsd. Doing that might cause problems for clients that are
not ignoring that field like they should, but they're buggy already.

> > implementations equality correct?
> > 
> > >   OPEN file "foo"
> > >                                         OPEN "foo", return stateid B
> > >   Receive reply to OPEN
> > >   Reset open state for "foo"
> > >   Associate stateid B to "foo"
> > > 
> > >   Receive CLOSE for A
> > >   Reset open state for "foo"
> > >   Replace stateid B with C
> > > 
> > > The fix is to examine the argument of the CLOSE, and check for a match
> > > with the current stateid "other" field. If the two do not match, then
> > > the above race occurred, and we should just ignore the CLOSE.
> > > 
> > > Reported-by: Benjamin Coddington <bcodding@redhat.com>
> > > Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> > > ---
> > >  fs/nfs/nfs4_fs.h  |  7 +++++++
> > >  fs/nfs/nfs4proc.c | 12 ++++++------
> > >  2 files changed, 13 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
> > > index 9b3a82abab07..1452177c822d 100644
> > > --- a/fs/nfs/nfs4_fs.h
> > > +++ b/fs/nfs/nfs4_fs.h
> > > @@ -542,6 +542,13 @@ static inline bool nfs4_valid_open_stateid(const struct
> > > nfs4_state *state)
> > >         return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
> > >  }
> > > 
> > > +static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state
> > > *state,
> > > +               const nfs4_stateid *stateid)
> > > +{
> > > +       return test_bit(NFS_OPEN_STATE, &state->flags) &&
> > > +               nfs4_stateid_match_other(&state->open_stateid, stateid);
> > > +}
> > > +
> > >  #else
> > > 
> > >  #define nfs4_close_state(a, b) do { } while (0)
> > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > index f550ac69ffa0..b7b0080977c0 100644
> > > --- a/fs/nfs/nfs4proc.c
> > > +++ b/fs/nfs/nfs4proc.c
> > > @@ -1458,7 +1458,6 @@ static void nfs_resync_open_stateid_locked(struct
> > > nfs4_state *state)
> > >  }
> > > 
> > >  static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
> > > -               nfs4_stateid *arg_stateid,
> > >                 nfs4_stateid *stateid, fmode_t fmode)
> > >  {
> > >         clear_bit(NFS_O_RDWR_STATE, &state->flags);
> > > @@ -1476,10 +1475,9 @@ static void nfs_clear_open_stateid_locked(struct
> > > nfs4_state *state,
> > >         }
> > >         if (stateid == NULL)
> > >                 return;
> > > -       /* Handle races with OPEN */
> > > -       if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) ||
> > > -           (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
> > > -           !nfs4_stateid_is_newer(stateid, &state->open_stateid))) {
> > > +       /* Handle OPEN+OPEN_DOWNGRADE races */
> > > +       if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
> > > +           !nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
> > >                 nfs_resync_open_stateid_locked(state);
> > >                 return;
> > >         }
> > > @@ -1493,7 +1491,9 @@ static void nfs_clear_open_stateid(struct nfs4_state
> > > *state,
> > >         nfs4_stateid *stateid, fmode_t fmode)
> > >  {
> > >         write_seqlock(&state->seqlock);
> > > -       nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode);
> > > +       /* Ignore, if the CLOSE argment doesn't match the current stateid */
> > > +       if (nfs4_state_match_open_stateid_other(state, arg_stateid))
> > > +               nfs_clear_open_stateid_locked(state, stateid, fmode);
> > >         write_sequnlock(&state->seqlock);
> > >         if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
> > >                 nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
> > > --
> > > 2.7.4
> > > 
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
J. Bruce Fields Feb. 15, 2018, 3:29 p.m. UTC | #12
On Thu, Feb 15, 2018 at 07:23:00AM -0500, Jeff Layton wrote:
> On Thu, 2018-02-15 at 13:19 +0100, Mkrtchyan, Tigran wrote:
> > 
> > ----- Original Message -----
> > > From: "Olga Kornievskaia" <aglo@umich.edu>
> > > To: "Trond Myklebust" <trond.myklebust@primarydata.com>
> > > Cc: "linux-nfs" <linux-nfs@vger.kernel.org>, "Benjamin Coddington" <bcodding@redhat.com>, "Jeff Layton"
> > > <jlayton@redhat.com>
> > > Sent: Tuesday, February 13, 2018 9:08:01 PM
> > > Subject: Re: [PATCH 1/2] NFSv4: Fix CLOSE races with OPEN
> > > On Mon, Nov 14, 2016 at 11:19 AM, Trond Myklebust
> > > <trond.myklebust@primarydata.com> wrote:
> > > > If the reply to a successful CLOSE call races with an OPEN to the same
> > > > file, we can end up scribbling over the stateid that represents the
> > > > new open state.
> > > > The race looks like:
> > > > 
> > > >   Client                                Server
> > > >   ======                                ======
> > > > 
> > > >   CLOSE stateid A on file "foo"
> > > >                                         CLOSE stateid A, return stateid C
> > > 
> > > Hi folks,
> > > 
> > > I'd like to understand this particular issue. Specifically I don't
> > > understand how can server return stateid C to the close with stateid
> > > A.
> > > 
> > > As per RFC 7530 or 5661. It says that state is returned by the close
> > > shouldn't be used.
> > > 
> > > Even though CLOSE returns a stateid, this stateid is not useful to
> > >   the client and should be treated as deprecated.  CLOSE "shuts down"
> > >   the state associated with all OPENs for the file by a single
> > >   open-owner.  As noted above, CLOSE will either release all file
> > >   locking state or return an error.  Therefore, the stateid returned by
> > >   CLOSE is not useful for the operations that follow.
> > > 
> > > Is this because the spec says "should" and not a "must"?
> > > 
> > > Linux server increments a state's sequenceid on CLOSE. Ontap server
> > > does not. I'm not sure what other servers do. Are all these
> > 
> > 
> > Our server sends back invalid state id for v4.1 and v4.0.
> > 
> > Tigran.
> > 
> 
> That's probably the best thing to do, and we should probably do the same
>  for v4.0 in knfsd. Doing that might cause problems for clients that are
> not ignoring that field like they should, but they're buggy already.

Not only buggy in theory, but actually failing in practice, it sounds
like.  So, a pretty safe change?

Returning an all-zeroes stateid would be simple and make the situation
really obvious.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Trond Myklebust Feb. 15, 2018, 3:37 p.m. UTC | #13
T24gVGh1LCAyMDE4LTAyLTE1IGF0IDEwOjI5IC0wNTAwLCBCcnVjZSBGaWVsZHMgd3JvdGU6DQo+
IE9uIFRodSwgRmViIDE1LCAyMDE4IGF0IDA3OjIzOjAwQU0gLTA1MDAsIEplZmYgTGF5dG9uIHdy
b3RlOg0KPiA+IE9uIFRodSwgMjAxOC0wMi0xNSBhdCAxMzoxOSArMDEwMCwgTWtydGNoeWFuLCBU
aWdyYW4gd3JvdGU6DQo+ID4gPiANCj4gPiA+IC0tLS0tIE9yaWdpbmFsIE1lc3NhZ2UgLS0tLS0N
Cj4gPiA+ID4gRnJvbTogIk9sZ2EgS29ybmlldnNrYWlhIiA8YWdsb0B1bWljaC5lZHU+DQo+ID4g
PiA+IFRvOiAiVHJvbmQgTXlrbGVidXN0IiA8dHJvbmQubXlrbGVidXN0QHByaW1hcnlkYXRhLmNv
bT4NCj4gPiA+ID4gQ2M6ICJsaW51eC1uZnMiIDxsaW51eC1uZnNAdmdlci5rZXJuZWwub3JnPiwg
IkJlbmphbWluDQo+ID4gPiA+IENvZGRpbmd0b24iIDxiY29kZGluZ0ByZWRoYXQuY29tPiwgIkpl
ZmYgTGF5dG9uIg0KPiA+ID4gPiA8amxheXRvbkByZWRoYXQuY29tPg0KPiA+ID4gPiBTZW50OiBU
dWVzZGF5LCBGZWJydWFyeSAxMywgMjAxOCA5OjA4OjAxIFBNDQo+ID4gPiA+IFN1YmplY3Q6IFJl
OiBbUEFUQ0ggMS8yXSBORlN2NDogRml4IENMT1NFIHJhY2VzIHdpdGggT1BFTg0KPiA+ID4gPiBP
biBNb24sIE5vdiAxNCwgMjAxNiBhdCAxMToxOSBBTSwgVHJvbmQgTXlrbGVidXN0DQo+ID4gPiA+
IDx0cm9uZC5teWtsZWJ1c3RAcHJpbWFyeWRhdGEuY29tPiB3cm90ZToNCj4gPiA+ID4gPiBJZiB0
aGUgcmVwbHkgdG8gYSBzdWNjZXNzZnVsIENMT1NFIGNhbGwgcmFjZXMgd2l0aCBhbiBPUEVOIHRv
DQo+ID4gPiA+ID4gdGhlIHNhbWUNCj4gPiA+ID4gPiBmaWxlLCB3ZSBjYW4gZW5kIHVwIHNjcmli
Ymxpbmcgb3ZlciB0aGUgc3RhdGVpZCB0aGF0DQo+ID4gPiA+ID4gcmVwcmVzZW50cyB0aGUNCj4g
PiA+ID4gPiBuZXcgb3BlbiBzdGF0ZS4NCj4gPiA+ID4gPiBUaGUgcmFjZSBsb29rcyBsaWtlOg0K
PiA+ID4gPiA+IA0KPiA+ID4gPiA+ICAgQ2xpZW50ICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICBTZXJ2ZXINCj4gPiA+ID4gPiAgID09PT09PSAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgPT09PT09DQo+ID4gPiA+ID4gDQo+ID4gPiA+ID4gICBDTE9TRSBzdGF0ZWlkIEEgb24g
ZmlsZSAiZm9vIg0KPiA+ID4gPiA+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICBDTE9TRSBzdGF0ZWlkIEEsDQo+ID4gPiA+ID4gcmV0dXJuIHN0YXRlaWQgQw0KPiA+ID4g
PiANCj4gPiA+ID4gSGkgZm9sa3MsDQo+ID4gPiA+IA0KPiA+ID4gPiBJJ2QgbGlrZSB0byB1bmRl
cnN0YW5kIHRoaXMgcGFydGljdWxhciBpc3N1ZS4gU3BlY2lmaWNhbGx5IEkNCj4gPiA+ID4gZG9u
J3QNCj4gPiA+ID4gdW5kZXJzdGFuZCBob3cgY2FuIHNlcnZlciByZXR1cm4gc3RhdGVpZCBDIHRv
IHRoZSBjbG9zZSB3aXRoDQo+ID4gPiA+IHN0YXRlaWQNCj4gPiA+ID4gQS4NCj4gPiA+ID4gDQo+
ID4gPiA+IEFzIHBlciBSRkMgNzUzMCBvciA1NjYxLiBJdCBzYXlzIHRoYXQgc3RhdGUgaXMgcmV0
dXJuZWQgYnkgdGhlDQo+ID4gPiA+IGNsb3NlDQo+ID4gPiA+IHNob3VsZG4ndCBiZSB1c2VkLg0K
PiA+ID4gPiANCj4gPiA+ID4gRXZlbiB0aG91Z2ggQ0xPU0UgcmV0dXJucyBhIHN0YXRlaWQsIHRo
aXMgc3RhdGVpZCBpcyBub3QgdXNlZnVsDQo+ID4gPiA+IHRvDQo+ID4gPiA+ICAgdGhlIGNsaWVu
dCBhbmQgc2hvdWxkIGJlIHRyZWF0ZWQgYXMgZGVwcmVjYXRlZC4gIENMT1NFICJzaHV0cw0KPiA+
ID4gPiBkb3duIg0KPiA+ID4gPiAgIHRoZSBzdGF0ZSBhc3NvY2lhdGVkIHdpdGggYWxsIE9QRU5z
IGZvciB0aGUgZmlsZSBieSBhIHNpbmdsZQ0KPiA+ID4gPiAgIG9wZW4tb3duZXIuICBBcyBub3Rl
ZCBhYm92ZSwgQ0xPU0Ugd2lsbCBlaXRoZXIgcmVsZWFzZSBhbGwNCj4gPiA+ID4gZmlsZQ0KPiA+
ID4gPiAgIGxvY2tpbmcgc3RhdGUgb3IgcmV0dXJuIGFuIGVycm9yLiAgVGhlcmVmb3JlLCB0aGUg
c3RhdGVpZA0KPiA+ID4gPiByZXR1cm5lZCBieQ0KPiA+ID4gPiAgIENMT1NFIGlzIG5vdCB1c2Vm
dWwgZm9yIHRoZSBvcGVyYXRpb25zIHRoYXQgZm9sbG93Lg0KPiA+ID4gPiANCj4gPiA+ID4gSXMg
dGhpcyBiZWNhdXNlIHRoZSBzcGVjIHNheXMgInNob3VsZCIgYW5kIG5vdCBhICJtdXN0Ij8NCj4g
PiA+ID4gDQo+ID4gPiA+IExpbnV4IHNlcnZlciBpbmNyZW1lbnRzIGEgc3RhdGUncyBzZXF1ZW5j
ZWlkIG9uIENMT1NFLiBPbnRhcA0KPiA+ID4gPiBzZXJ2ZXINCj4gPiA+ID4gZG9lcyBub3QuIEkn
bSBub3Qgc3VyZSB3aGF0IG90aGVyIHNlcnZlcnMgZG8uIEFyZSBhbGwgdGhlc2UNCj4gPiA+IA0K
PiA+ID4gDQo+ID4gPiBPdXIgc2VydmVyIHNlbmRzIGJhY2sgaW52YWxpZCBzdGF0ZSBpZCBmb3Ig
djQuMSBhbmQgdjQuMC4NCj4gPiA+IA0KPiA+ID4gVGlncmFuLg0KPiA+ID4gDQo+ID4gDQo+ID4g
VGhhdCdzIHByb2JhYmx5IHRoZSBiZXN0IHRoaW5nIHRvIGRvLCBhbmQgd2Ugc2hvdWxkIHByb2Jh
Ymx5IGRvIHRoZQ0KPiA+IHNhbWUNCj4gPiAgZm9yIHY0LjAgaW4ga25mc2QuIERvaW5nIHRoYXQg
bWlnaHQgY2F1c2UgcHJvYmxlbXMgZm9yIGNsaWVudHMNCj4gPiB0aGF0IGFyZQ0KPiA+IG5vdCBp
Z25vcmluZyB0aGF0IGZpZWxkIGxpa2UgdGhleSBzaG91bGQsIGJ1dCB0aGV5J3JlIGJ1Z2d5DQo+
ID4gYWxyZWFkeS4NCj4gDQo+IE5vdCBvbmx5IGJ1Z2d5IGluIHRoZW9yeSwgYnV0IGFjdHVhbGx5
IGZhaWxpbmcgaW4gcHJhY3RpY2UsIGl0IHNvdW5kcw0KPiBsaWtlLiAgU28sIGEgcHJldHR5IHNh
ZmUgY2hhbmdlPw0KPiANCj4gUmV0dXJuaW5nIGFuIGFsbC16ZXJvZXMgc3RhdGVpZCB3b3VsZCBi
ZSBzaW1wbGUgYW5kIG1ha2UgdGhlDQo+IHNpdHVhdGlvbg0KPiByZWFsbHkgb2J2aW91cy4NCj4g
DQoNCklmIHlvdSdyZSBnb2luZyB0byBkbyB0aGF0LCB0aGVuIHdoeSBub3QganVzdCBleHBhbmQg
ZmI1MDBhN2NmZWU3IHRvDQphcHBseSB0byBORlN2NC4wIHRvbz8gSWYgeW91J3JlIGdvaW5nIHRv
IHZpb2xhdGUgdGhlIHY0LjAgc3BlYywgdGhlbg0KeW91IG1pZ2h0IGFzIHdlbGwgZG8gaXQgaW4g
YSB3YXkgdGhhdCBpcyBzYW5jdGlvbmVkIGJ5IHY0LjEsIHNvIHRoYXQNCnBlb3BsZSBjYW4gdXNl
IHRoZSBzYW1lIHdpcmVzaGFyayBmaWx0ZXJzIHdoZW4gZGVidWdnaW5nLg0KDQotLSANClRyb25k
IE15a2xlYnVzdA0KTGludXggTkZTIGNsaWVudCBtYWludGFpbmVyLCBQcmltYXJ5RGF0YQ0KdHJv
bmQubXlrbGVidXN0QHByaW1hcnlkYXRhLmNvbQ0K

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9b3a82abab07..1452177c822d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -542,6 +542,13 @@  static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
 	return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
 }
 
+static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state *state,
+		const nfs4_stateid *stateid)
+{
+	return test_bit(NFS_OPEN_STATE, &state->flags) &&
+		nfs4_stateid_match_other(&state->open_stateid, stateid);
+}
+
 #else
 
 #define nfs4_close_state(a, b) do { } while (0)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f550ac69ffa0..b7b0080977c0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1458,7 +1458,6 @@  static void nfs_resync_open_stateid_locked(struct nfs4_state *state)
 }
 
 static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
-		nfs4_stateid *arg_stateid,
 		nfs4_stateid *stateid, fmode_t fmode)
 {
 	clear_bit(NFS_O_RDWR_STATE, &state->flags);
@@ -1476,10 +1475,9 @@  static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
 	}
 	if (stateid == NULL)
 		return;
-	/* Handle races with OPEN */
-	if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) ||
-	    (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
-	    !nfs4_stateid_is_newer(stateid, &state->open_stateid))) {
+	/* Handle OPEN+OPEN_DOWNGRADE races */
+	if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
+	    !nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
 		nfs_resync_open_stateid_locked(state);
 		return;
 	}
@@ -1493,7 +1491,9 @@  static void nfs_clear_open_stateid(struct nfs4_state *state,
 	nfs4_stateid *stateid, fmode_t fmode)
 {
 	write_seqlock(&state->seqlock);
-	nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode);
+	/* Ignore, if the CLOSE argment doesn't match the current stateid */
+	if (nfs4_state_match_open_stateid_other(state, arg_stateid))
+		nfs_clear_open_stateid_locked(state, stateid, fmode);
 	write_sequnlock(&state->seqlock);
 	if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
 		nfs4_schedule_state_manager(state->owner->so_server->nfs_client);