nfsd4: Prevent the reuse of a closed stateid
diff mbox

Message ID 2087b4cab6c695a7df01c1135f1773c9b762f0b0.1508248427.git.bcodding@redhat.com
State New
Headers show

Commit Message

Benjamin Coddington Oct. 17, 2017, 1:55 p.m. UTC
While running generic/089 on NFSv4.1, the following on-the-wire exchange
occurs:

Client                  Server
----------              ----------
OPEN (owner A)  ->
                    <-  OPEN response: state A1
CLOSE (state A1)->
OPEN (owner A)  ->
                    <-  CLOSE response: state A2
                    <-  OPEN response: state A3
LOCK (state A3) ->
                    <-  LOCK response: NFS4_BAD_STATEID

The server should not be returning state A3 in response to the second OPEN
after processing the CLOSE and sending back state A2.  The problem is that
nfsd4_process_open2() is able to find an existing open state just after
nfsd4_close() has incremented the state's sequence number but before
calling unhash_ol_stateid().

Fix this by using the state's sc_type field to identify closed state, and
protect the update of that field with st_mutex.  nfsd4_find_existing_open()
will return with the st_mutex held, so that the state will not transition
between being looked up and then updated in nfsd4_process_open2().

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
---
 fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

Comments

Jeff Layton Oct. 17, 2017, 4:39 p.m. UTC | #1
On Tue, 2017-10-17 at 09:55 -0400, Benjamin Coddington wrote:
> While running generic/089 on NFSv4.1, the following on-the-wire exchange
> occurs:
> 
> Client                  Server
> ----------              ----------
> OPEN (owner A)  ->
>                     <-  OPEN response: state A1
> CLOSE (state A1)->
> OPEN (owner A)  ->
>                     <-  CLOSE response: state A2
>                     <-  OPEN response: state A3
> LOCK (state A3) ->
>                     <-  LOCK response: NFS4_BAD_STATEID
> 
> The server should not be returning state A3 in response to the second OPEN
> after processing the CLOSE and sending back state A2.  The problem is that
> nfsd4_process_open2() is able to find an existing open state just after
> nfsd4_close() has incremented the state's sequence number but before
> calling unhash_ol_stateid().
> 
> Fix this by using the state's sc_type field to identify closed state, and
> protect the update of that field with st_mutex.  nfsd4_find_existing_open()
> will return with the st_mutex held, so that the state will not transition
> between being looked up and then updated in nfsd4_process_open2().
> 
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> ---
>  fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
>  1 file changed, 19 insertions(+), 10 deletions(-)
> 

The problem is real, but I'm not thrilled with the fix. It seems more
lock thrashy...

Could we instead fix this by just unhashing the stateid earlier, before
the nfs4_inc_and_copy_stateid call?

> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0c04f81aa63b..17473a092d01 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -3503,11 +3503,12 @@ static const struct nfs4_stateowner_operations openowner_ops = {
>  static struct nfs4_ol_stateid *
>  nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
>  {
> -	struct nfs4_ol_stateid *local, *ret = NULL;
> +	struct nfs4_ol_stateid *local, *ret;
>  	struct nfs4_openowner *oo = open->op_openowner;
>  
> -	lockdep_assert_held(&fp->fi_lock);
> -
> +retry:
> +	ret = NULL;
> +	spin_lock(&fp->fi_lock);
>  	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
>  		/* ignore lock owners */
>  		if (local->st_stateowner->so_is_open_owner == 0)
> @@ -3518,6 +3519,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
>  			break;
>  		}
>  	}
> +	spin_unlock(&fp->fi_lock);
> +
> +	/* Did we race with CLOSE? */
> +	if (ret) {
> +		mutex_lock(&ret->st_mutex);
> +		if (ret->st_stid.sc_type != NFS4_OPEN_STID) {
> +			mutex_unlock(&ret->st_mutex);
> +			nfs4_put_stid(&ret->st_stid);
> +			goto retry;
> +		}
> +	}
> +
>  	return ret;
>  }
>  
> @@ -3566,7 +3579,6 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
>  	mutex_lock(&stp->st_mutex);
>  
>  	spin_lock(&oo->oo_owner.so_client->cl_lock);
> -	spin_lock(&fp->fi_lock);
>  
>  	retstp = nfsd4_find_existing_open(fp, open);
>  	if (retstp)
> @@ -3583,13 +3595,13 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
>  	stp->st_deny_bmap = 0;
>  	stp->st_openstp = NULL;
>  	list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
> +	spin_lock(&fp->fi_lock);
>  	list_add(&stp->st_perfile, &fp->fi_stateids);
> +	spin_unlock(&fp->fi_lock);
>  
>  out_unlock:
> -	spin_unlock(&fp->fi_lock);
>  	spin_unlock(&oo->oo_owner.so_client->cl_lock);
>  	if (retstp) {
> -		mutex_lock(&retstp->st_mutex);
>  		/* To keep mutex tracking happy */
>  		mutex_unlock(&stp->st_mutex);
>  		stp = retstp;
> @@ -4403,9 +4415,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
>  		status = nfs4_check_deleg(cl, open, &dp);
>  		if (status)
>  			goto out;
> -		spin_lock(&fp->fi_lock);
>  		stp = nfsd4_find_existing_open(fp, open);
> -		spin_unlock(&fp->fi_lock);
>  	} else {
>  		open->op_file = NULL;
>  		status = nfserr_bad_stateid;
> @@ -4419,7 +4429,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
>  	 */
>  	if (stp) {
>  		/* Stateid was found, this is an OPEN upgrade */
> -		mutex_lock(&stp->st_mutex);
>  		status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
>  		if (status) {
>  			mutex_unlock(&stp->st_mutex);
> @@ -5294,7 +5303,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
>  	bool unhashed;
>  	LIST_HEAD(reaplist);
>  
> -	s->st_stid.sc_type = NFS4_CLOSED_STID;
>  	spin_lock(&clp->cl_lock);
>  	unhashed = unhash_open_stateid(s, &reaplist);
>  
> @@ -5335,6 +5343,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	if (status)
>  		goto out; 
>  	nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
> +	stp->st_stid.sc_type = NFS4_CLOSED_STID;
>  	mutex_unlock(&stp->st_mutex);
>  
>  	nfsd4_close_open_stateid(stp);
Benjamin Coddington Oct. 17, 2017, 5:46 p.m. UTC | #2
On 17 Oct 2017, at 12:39, Jeff Layton wrote:

> On Tue, 2017-10-17 at 09:55 -0400, Benjamin Coddington wrote:
>> While running generic/089 on NFSv4.1, the following on-the-wire 
>> exchange
>> occurs:
>>
>> Client                  Server
>> ----------              ----------
>> OPEN (owner A)  ->
>>                     <-  OPEN response: state A1
>> CLOSE (state A1)->
>> OPEN (owner A)  ->
>>                     <-  CLOSE response: state A2
>>                     <-  OPEN response: state A3
>> LOCK (state A3) ->
>>                     <-  LOCK response: NFS4_BAD_STATEID
>>
>> The server should not be returning state A3 in response to the second 
>> OPEN
>> after processing the CLOSE and sending back state A2.  The problem is 
>> that
>> nfsd4_process_open2() is able to find an existing open state just 
>> after
>> nfsd4_close() has incremented the state's sequence number but before
>> calling unhash_ol_stateid().
>>
>> Fix this by using the state's sc_type field to identify closed state, 
>> and
>> protect the update of that field with st_mutex.  
>> nfsd4_find_existing_open()
>> will return with the st_mutex held, so that the state will not 
>> transition
>> between being looked up and then updated in nfsd4_process_open2().
>>
>> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
>> ---
>>  fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
>>  1 file changed, 19 insertions(+), 10 deletions(-)
>>
>
> The problem is real, but I'm not thrilled with the fix. It seems more
> lock thrashy...

Really?  I don't think I increased any call counts to spinlocks or mutex
locks.  The locking overhead should be equivalent.

> Could we instead fix this by just unhashing the stateid earlier, 
> before
> the nfs4_inc_and_copy_stateid call?

I think I tried this - and if I remember correctly, the problem is that 
you
can't hold st_mutux while taking the cl_lock (or maybe it was the 
fi_lock?).
I tried several simpler ways, and they resulted in deadlocks.

That was a couple of weeks ago, so I apologize if my memory is fuzzy.  I
should have sent this patch off to the list then.  If you need me to to
verify that, I can - but maybe someone more familiar with the locking 
here
can save me that time.

Ben
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton Oct. 17, 2017, 6:19 p.m. UTC | #3
On Tue, 2017-10-17 at 13:46 -0400, Benjamin Coddington wrote:
> On 17 Oct 2017, at 12:39, Jeff Layton wrote:
> 
> > On Tue, 2017-10-17 at 09:55 -0400, Benjamin Coddington wrote:
> > > While running generic/089 on NFSv4.1, the following on-the-wire 
> > > exchange
> > > occurs:
> > > 
> > > Client                  Server
> > > ----------              ----------
> > > OPEN (owner A)  ->
> > >                     <-  OPEN response: state A1
> > > CLOSE (state A1)->
> > > OPEN (owner A)  ->
> > >                     <-  CLOSE response: state A2
> > >                     <-  OPEN response: state A3
> > > LOCK (state A3) ->
> > >                     <-  LOCK response: NFS4_BAD_STATEID
> > > 
> > > The server should not be returning state A3 in response to the second 
> > > OPEN
> > > after processing the CLOSE and sending back state A2.  The problem is 
> > > that
> > > nfsd4_process_open2() is able to find an existing open state just 
> > > after
> > > nfsd4_close() has incremented the state's sequence number but before
> > > calling unhash_ol_stateid().
> > > 
> > > Fix this by using the state's sc_type field to identify closed state, 
> > > and
> > > protect the update of that field with st_mutex.  
> > > nfsd4_find_existing_open()
> > > will return with the st_mutex held, so that the state will not 
> > > transition
> > > between being looked up and then updated in nfsd4_process_open2().
> > > 
> > > Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> > > ---
> > >  fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
> > >  1 file changed, 19 insertions(+), 10 deletions(-)
> > > 
> > 
> > The problem is real, but I'm not thrilled with the fix. It seems more
> > lock thrashy...
> 
> Really?  I don't think I increased any call counts to spinlocks or mutex
> locks.  The locking overhead should be equivalent.
> 

init_open_stateid will now end up taking fi_lock twice. Also we now have
to take the st_mutex in nfsd4_find_existing_open, just to check sc_type.
Neither of those are probably unreasonable, it's just messier than I'd
like.

> > Could we instead fix this by just unhashing the stateid earlier, 
> > before
> > the nfs4_inc_and_copy_stateid call?
> 
> I think I tried this - and if I remember correctly, the problem is that 
> you
> can't hold st_mutux while taking the cl_lock (or maybe it was the 
> fi_lock?).
> I tried several simpler ways, and they resulted in deadlocks.
> 
> That was a couple of weeks ago, so I apologize if my memory is fuzzy.  I
> should have sent this patch off to the list then.  If you need me to to
> verify that, I can - but maybe someone more familiar with the locking 
> here
> can save me that time.
> 
> Ben


There should be no problem taking the cl_lock while holding st_mutex.
It's never safe to do the reverse though. I'd just do the unhash before 
nfs4_inc_and_copy_stateid, and then save the rest of the stuff in
nfsd4_close_open_stateid for after the st_mutex has been dropped.

I think what I'm suggesting is possible. You may need to unroll
nfsd4_close_open_stateid to make it work though.
Jeff Layton Oct. 17, 2017, 7:11 p.m. UTC | #4
On Tue, 2017-10-17 at 09:55 -0400, Benjamin Coddington wrote:
> While running generic/089 on NFSv4.1, the following on-the-wire exchange
> occurs:
> 
> Client                  Server
> ----------              ----------
> OPEN (owner A)  ->
>                     <-  OPEN response: state A1
> CLOSE (state A1)->
> OPEN (owner A)  ->
>                     <-  CLOSE response: state A2
>                     <-  OPEN response: state A3
> LOCK (state A3) ->
>                     <-  LOCK response: NFS4_BAD_STATEID
> 
> The server should not be returning state A3 in response to the second OPEN
> after processing the CLOSE and sending back state A2.  The problem is that
> nfsd4_process_open2() is able to find an existing open state just after
> nfsd4_close() has incremented the state's sequence number but before
> calling unhash_ol_stateid().
> 
> Fix this by using the state's sc_type field to identify closed state, and
> protect the update of that field with st_mutex.  nfsd4_find_existing_open()
> will return with the st_mutex held, so that the state will not transition
> between being looked up and then updated in nfsd4_process_open2().
> 
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> ---
>  fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
>  1 file changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0c04f81aa63b..17473a092d01 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -3503,11 +3503,12 @@ static const struct nfs4_stateowner_operations openowner_ops = {
>  static struct nfs4_ol_stateid *
>  nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
>  {
> -	struct nfs4_ol_stateid *local, *ret = NULL;
> +	struct nfs4_ol_stateid *local, *ret;
>  	struct nfs4_openowner *oo = open->op_openowner;
>  
> -	lockdep_assert_held(&fp->fi_lock);
> -
> +retry:
> +	ret = NULL;
> +	spin_lock(&fp->fi_lock);
>  	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
>  		/* ignore lock owners */
>  		if (local->st_stateowner->so_is_open_owner == 0)
> @@ -3518,6 +3519,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
>  			break;
>  		}
>  	}
> +	spin_unlock(&fp->fi_lock);
> +
> +	/* Did we race with CLOSE? */
> +	if (ret) {
> +		mutex_lock(&ret->st_mutex);
> +		if (ret->st_stid.sc_type != NFS4_OPEN_STID) {
> +			mutex_unlock(&ret->st_mutex);
> +			nfs4_put_stid(&ret->st_stid);
> +			goto retry;
> +		}
> +	}
> +
>  	return ret;
>  }
>  
> @@ -3566,7 +3579,6 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
>  	mutex_lock(&stp->st_mutex);
>  
>  	spin_lock(&oo->oo_owner.so_client->cl_lock);
> -	spin_lock(&fp->fi_lock);
>  
>  	retstp = nfsd4_find_existing_open(fp, open);
>  	if (retstp)
> @@ -3583,13 +3595,13 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
>  	stp->st_deny_bmap = 0;
>  	stp->st_openstp = NULL;
>  	list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
> +	spin_lock(&fp->fi_lock);
>  	list_add(&stp->st_perfile, &fp->fi_stateids);
> +	spin_unlock(&fp->fi_lock);
>  

Another potential problem above. I don't think it's be possible with
v4.0, but I think it could happen with v4.1+:

Could we end up with racing opens, such that two different nfsds search
for an existing open and not find one? Then, they both end up here and
insert two different stateids for the same openowner+file.

That's prevented now because we do it all under the same fi_lock, but
that won't be the case here.

>  out_unlock:
> -	spin_unlock(&fp->fi_lock);
>  	spin_unlock(&oo->oo_owner.so_client->cl_lock);
>  	if (retstp) {
> -		mutex_lock(&retstp->st_mutex);
>  		/* To keep mutex tracking happy */
>  		mutex_unlock(&stp->st_mutex);
>  		stp = retstp;
> @@ -4403,9 +4415,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
>  		status = nfs4_check_deleg(cl, open, &dp);
>  		if (status)
>  			goto out;
> -		spin_lock(&fp->fi_lock);
>  		stp = nfsd4_find_existing_open(fp, open);
> -		spin_unlock(&fp->fi_lock);
>  	} else {
>  		open->op_file = NULL;
>  		status = nfserr_bad_stateid;
> @@ -4419,7 +4429,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
>  	 */
>  	if (stp) {
>  		/* Stateid was found, this is an OPEN upgrade */
> -		mutex_lock(&stp->st_mutex);
>  		status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
>  		if (status) {
>  			mutex_unlock(&stp->st_mutex);
> @@ -5294,7 +5303,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
>  	bool unhashed;
>  	LIST_HEAD(reaplist);
>  
> -	s->st_stid.sc_type = NFS4_CLOSED_STID;
>  	spin_lock(&clp->cl_lock);
>  	unhashed = unhash_open_stateid(s, &reaplist);
>  
> @@ -5335,6 +5343,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	if (status)
>  		goto out; 
>  	nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
> +	stp->st_stid.sc_type = NFS4_CLOSED_STID;
>  	mutex_unlock(&stp->st_mutex);
>  
>  	nfsd4_close_open_stateid(stp);
Benjamin Coddington Oct. 17, 2017, 8:40 p.m. UTC | #5
On 17 Oct 2017, at 14:19, Jeff Layton wrote:

> On Tue, 2017-10-17 at 13:46 -0400, Benjamin Coddington wrote:
>> On 17 Oct 2017, at 12:39, Jeff Layton wrote:
>>
>>> On Tue, 2017-10-17 at 09:55 -0400, Benjamin Coddington wrote:
>>>> While running generic/089 on NFSv4.1, the following on-the-wire
>>>> exchange
>>>> occurs:
>>>>
>>>> Client                  Server
>>>> ----------              ----------
>>>> OPEN (owner A)  ->
>>>>                     <-  OPEN response: state A1
>>>> CLOSE (state A1)->
>>>> OPEN (owner A)  ->
>>>>                     <-  CLOSE response: state A2
>>>>                     <-  OPEN response: state A3
>>>> LOCK (state A3) ->
>>>>                     <-  LOCK response: NFS4_BAD_STATEID
>>>>
>>>> The server should not be returning state A3 in response to the second
>>>> OPEN
>>>> after processing the CLOSE and sending back state A2.  The problem is
>>>> that
>>>> nfsd4_process_open2() is able to find an existing open state just
>>>> after
>>>> nfsd4_close() has incremented the state's sequence number but before
>>>> calling unhash_ol_stateid().
>>>>
>>>> Fix this by using the state's sc_type field to identify closed state,
>>>> and
>>>> protect the update of that field with st_mutex.
>>>> nfsd4_find_existing_open()
>>>> will return with the st_mutex held, so that the state will not
>>>> transition
>>>> between being looked up and then updated in nfsd4_process_open2().
>>>>
>>>> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
>>>> ---
>>>>  fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
>>>>  1 file changed, 19 insertions(+), 10 deletions(-)
>>>>
>>>
>>> The problem is real, but I'm not thrilled with the fix. It seems more
>>> lock thrashy...
>>
>> Really?  I don't think I increased any call counts to spinlocks or mutex
>> locks.  The locking overhead should be equivalent.
>>
>
> init_open_stateid will now end up taking fi_lock twice.

Yes, that's true unless there's an existing state.

> Also we now have to take the st_mutex in nfsd4_find_existing_open, just to
> check sc_type.  Neither of those are probably unreasonable, it's just
> messier than I'd like.

It is indeed messy.. no argument.  I'll spin up your suggestion to unhash
the stateid before updating and take it for a ride and let you know the
results.  Thanks for looking at this.

Ben
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Coddington Oct. 17, 2017, 8:44 p.m. UTC | #6
On 17 Oct 2017, at 15:11, Jeff Layton wrote:

> On Tue, 2017-10-17 at 09:55 -0400, Benjamin Coddington wrote:
>> While running generic/089 on NFSv4.1, the following on-the-wire 
>> exchange
>> occurs:
>>
>> Client                  Server
>> ----------              ----------
>> OPEN (owner A)  ->
>>                     <-  OPEN response: state A1
>> CLOSE (state A1)->
>> OPEN (owner A)  ->
>>                     <-  CLOSE response: state A2
>>                     <-  OPEN response: state A3
>> LOCK (state A3) ->
>>                     <-  LOCK response: NFS4_BAD_STATEID
>>
>> The server should not be returning state A3 in response to the second 
>> OPEN
>> after processing the CLOSE and sending back state A2.  The problem is 
>> that
>> nfsd4_process_open2() is able to find an existing open state just 
>> after
>> nfsd4_close() has incremented the state's sequence number but before
>> calling unhash_ol_stateid().
>>
>> Fix this by using the state's sc_type field to identify closed state, 
>> and
>> protect the update of that field with st_mutex.  
>> nfsd4_find_existing_open()
>> will return with the st_mutex held, so that the state will not 
>> transition
>> between being looked up and then updated in nfsd4_process_open2().
>>
>> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
>> ---
>>  fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
>>  1 file changed, 19 insertions(+), 10 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 0c04f81aa63b..17473a092d01 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -3503,11 +3503,12 @@ static const struct 
>> nfs4_stateowner_operations openowner_ops = {
>>  static struct nfs4_ol_stateid *
>>  nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open 
>> *open)
>>  {
>> -	struct nfs4_ol_stateid *local, *ret = NULL;
>> +	struct nfs4_ol_stateid *local, *ret;
>>  	struct nfs4_openowner *oo = open->op_openowner;
>>
>> -	lockdep_assert_held(&fp->fi_lock);
>> -
>> +retry:
>> +	ret = NULL;
>> +	spin_lock(&fp->fi_lock);
>>  	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
>>  		/* ignore lock owners */
>>  		if (local->st_stateowner->so_is_open_owner == 0)
>> @@ -3518,6 +3519,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp, 
>> struct nfsd4_open *open)
>>  			break;
>>  		}
>>  	}
>> +	spin_unlock(&fp->fi_lock);
>> +
>> +	/* Did we race with CLOSE? */
>> +	if (ret) {
>> +		mutex_lock(&ret->st_mutex);
>> +		if (ret->st_stid.sc_type != NFS4_OPEN_STID) {
>> +			mutex_unlock(&ret->st_mutex);
>> +			nfs4_put_stid(&ret->st_stid);
>> +			goto retry;
>> +		}
>> +	}
>> +
>>  	return ret;
>>  }
>>
>> @@ -3566,7 +3579,6 @@ init_open_stateid(struct nfs4_file *fp, struct 
>> nfsd4_open *open)
>>  	mutex_lock(&stp->st_mutex);
>>
>>  	spin_lock(&oo->oo_owner.so_client->cl_lock);
>> -	spin_lock(&fp->fi_lock);
>>
>>  	retstp = nfsd4_find_existing_open(fp, open);
>>  	if (retstp)
>> @@ -3583,13 +3595,13 @@ init_open_stateid(struct nfs4_file *fp, 
>> struct nfsd4_open *open)
>>  	stp->st_deny_bmap = 0;
>>  	stp->st_openstp = NULL;
>>  	list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
>> +	spin_lock(&fp->fi_lock);
>>  	list_add(&stp->st_perfile, &fp->fi_stateids);
>> +	spin_unlock(&fp->fi_lock);
>>
>
> Another potential problem above. I don't think it's be possible with
> v4.0, but I think it could happen with v4.1+:
>
> Could we end up with racing opens, such that two different nfsds 
> search
> for an existing open and not find one? Then, they both end up here and
> insert two different stateids for the same openowner+file.
>
> That's prevented now because we do it all under the same fi_lock, but
> that won't be the case here.

Yes, that's definitely a problem, and its reminded me why I kept 
dropping
fi_lock - you can't take the st_mutex while holding it..  This is a 
tangly
bit of locking in here..  I'll see what I can come up with.

Ben
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
J. Bruce Fields Nov. 10, 2017, 10:03 p.m. UTC | #7
I'm assuming this has been addressed by Trond's series (in linux-next
now); but I haven't checked carefully....

--b.

On Tue, Oct 17, 2017 at 09:55:05AM -0400, Benjamin Coddington wrote:
> While running generic/089 on NFSv4.1, the following on-the-wire exchange
> occurs:
> 
> Client                  Server
> ----------              ----------
> OPEN (owner A)  ->
>                     <-  OPEN response: state A1
> CLOSE (state A1)->
> OPEN (owner A)  ->
>                     <-  CLOSE response: state A2
>                     <-  OPEN response: state A3
> LOCK (state A3) ->
>                     <-  LOCK response: NFS4_BAD_STATEID
> 
> The server should not be returning state A3 in response to the second OPEN
> after processing the CLOSE and sending back state A2.  The problem is that
> nfsd4_process_open2() is able to find an existing open state just after
> nfsd4_close() has incremented the state's sequence number but before
> calling unhash_ol_stateid().
> 
> Fix this by using the state's sc_type field to identify closed state, and
> protect the update of that field with st_mutex.  nfsd4_find_existing_open()
> will return with the st_mutex held, so that the state will not transition
> between being looked up and then updated in nfsd4_process_open2().
> 
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> ---
>  fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
>  1 file changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0c04f81aa63b..17473a092d01 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -3503,11 +3503,12 @@ static const struct nfs4_stateowner_operations openowner_ops = {
>  static struct nfs4_ol_stateid *
>  nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
>  {
> -	struct nfs4_ol_stateid *local, *ret = NULL;
> +	struct nfs4_ol_stateid *local, *ret;
>  	struct nfs4_openowner *oo = open->op_openowner;
>  
> -	lockdep_assert_held(&fp->fi_lock);
> -
> +retry:
> +	ret = NULL;
> +	spin_lock(&fp->fi_lock);
>  	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
>  		/* ignore lock owners */
>  		if (local->st_stateowner->so_is_open_owner == 0)
> @@ -3518,6 +3519,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
>  			break;
>  		}
>  	}
> +	spin_unlock(&fp->fi_lock);
> +
> +	/* Did we race with CLOSE? */
> +	if (ret) {
> +		mutex_lock(&ret->st_mutex);
> +		if (ret->st_stid.sc_type != NFS4_OPEN_STID) {
> +			mutex_unlock(&ret->st_mutex);
> +			nfs4_put_stid(&ret->st_stid);
> +			goto retry;
> +		}
> +	}
> +
>  	return ret;
>  }
>  
> @@ -3566,7 +3579,6 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
>  	mutex_lock(&stp->st_mutex);
>  
>  	spin_lock(&oo->oo_owner.so_client->cl_lock);
> -	spin_lock(&fp->fi_lock);
>  
>  	retstp = nfsd4_find_existing_open(fp, open);
>  	if (retstp)
> @@ -3583,13 +3595,13 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
>  	stp->st_deny_bmap = 0;
>  	stp->st_openstp = NULL;
>  	list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
> +	spin_lock(&fp->fi_lock);
>  	list_add(&stp->st_perfile, &fp->fi_stateids);
> +	spin_unlock(&fp->fi_lock);
>  
>  out_unlock:
> -	spin_unlock(&fp->fi_lock);
>  	spin_unlock(&oo->oo_owner.so_client->cl_lock);
>  	if (retstp) {
> -		mutex_lock(&retstp->st_mutex);
>  		/* To keep mutex tracking happy */
>  		mutex_unlock(&stp->st_mutex);
>  		stp = retstp;
> @@ -4403,9 +4415,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
>  		status = nfs4_check_deleg(cl, open, &dp);
>  		if (status)
>  			goto out;
> -		spin_lock(&fp->fi_lock);
>  		stp = nfsd4_find_existing_open(fp, open);
> -		spin_unlock(&fp->fi_lock);
>  	} else {
>  		open->op_file = NULL;
>  		status = nfserr_bad_stateid;
> @@ -4419,7 +4429,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
>  	 */
>  	if (stp) {
>  		/* Stateid was found, this is an OPEN upgrade */
> -		mutex_lock(&stp->st_mutex);
>  		status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
>  		if (status) {
>  			mutex_unlock(&stp->st_mutex);
> @@ -5294,7 +5303,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
>  	bool unhashed;
>  	LIST_HEAD(reaplist);
>  
> -	s->st_stid.sc_type = NFS4_CLOSED_STID;
>  	spin_lock(&clp->cl_lock);
>  	unhashed = unhash_open_stateid(s, &reaplist);
>  
> @@ -5335,6 +5343,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	if (status)
>  		goto out; 
>  	nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
> +	stp->st_stid.sc_type = NFS4_CLOSED_STID;
>  	mutex_unlock(&stp->st_mutex);
>  
>  	nfsd4_close_open_stateid(stp);
> -- 
> 2.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Coddington Nov. 13, 2017, 1:22 p.m. UTC | #8
Yes, I believe it has, though I haven't tested it yet, unfortunately.

On 10 Nov 2017, at 17:03, J. Bruce Fields wrote:

> I'm assuming this has been addressed by Trond's series (in linux-next
> now); but I haven't checked carefully....
>
> --b.
>
> On Tue, Oct 17, 2017 at 09:55:05AM -0400, Benjamin Coddington wrote:
>> While running generic/089 on NFSv4.1, the following on-the-wire 
>> exchange
>> occurs:
>>
>> Client                  Server
>> ----------              ----------
>> OPEN (owner A)  ->
>>                     <-  OPEN response: state A1
>> CLOSE (state A1)->
>> OPEN (owner A)  ->
>>                     <-  CLOSE response: state A2
>>                     <-  OPEN response: state A3
>> LOCK (state A3) ->
>>                     <-  LOCK response: NFS4_BAD_STATEID
>>
>> The server should not be returning state A3 in response to the second 
>> OPEN
>> after processing the CLOSE and sending back state A2.  The problem is 
>> that
>> nfsd4_process_open2() is able to find an existing open state just 
>> after
>> nfsd4_close() has incremented the state's sequence number but before
>> calling unhash_ol_stateid().
>>
>> Fix this by using the state's sc_type field to identify closed state, 
>> and
>> protect the update of that field with st_mutex.  
>> nfsd4_find_existing_open()
>> will return with the st_mutex held, so that the state will not 
>> transition
>> between being looked up and then updated in nfsd4_process_open2().
>>
>> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
>> ---
>>  fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
>>  1 file changed, 19 insertions(+), 10 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 0c04f81aa63b..17473a092d01 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -3503,11 +3503,12 @@ static const struct 
>> nfs4_stateowner_operations openowner_ops = {
>>  static struct nfs4_ol_stateid *
>>  nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open 
>> *open)
>>  {
>> -	struct nfs4_ol_stateid *local, *ret = NULL;
>> +	struct nfs4_ol_stateid *local, *ret;
>>  	struct nfs4_openowner *oo = open->op_openowner;
>>
>> -	lockdep_assert_held(&fp->fi_lock);
>> -
>> +retry:
>> +	ret = NULL;
>> +	spin_lock(&fp->fi_lock);
>>  	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
>>  		/* ignore lock owners */
>>  		if (local->st_stateowner->so_is_open_owner == 0)
>> @@ -3518,6 +3519,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp, 
>> struct nfsd4_open *open)
>>  			break;
>>  		}
>>  	}
>> +	spin_unlock(&fp->fi_lock);
>> +
>> +	/* Did we race with CLOSE? */
>> +	if (ret) {
>> +		mutex_lock(&ret->st_mutex);
>> +		if (ret->st_stid.sc_type != NFS4_OPEN_STID) {
>> +			mutex_unlock(&ret->st_mutex);
>> +			nfs4_put_stid(&ret->st_stid);
>> +			goto retry;
>> +		}
>> +	}
>> +
>>  	return ret;
>>  }
>>
>> @@ -3566,7 +3579,6 @@ init_open_stateid(struct nfs4_file *fp, struct 
>> nfsd4_open *open)
>>  	mutex_lock(&stp->st_mutex);
>>
>>  	spin_lock(&oo->oo_owner.so_client->cl_lock);
>> -	spin_lock(&fp->fi_lock);
>>
>>  	retstp = nfsd4_find_existing_open(fp, open);
>>  	if (retstp)
>> @@ -3583,13 +3595,13 @@ init_open_stateid(struct nfs4_file *fp, 
>> struct nfsd4_open *open)
>>  	stp->st_deny_bmap = 0;
>>  	stp->st_openstp = NULL;
>>  	list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
>> +	spin_lock(&fp->fi_lock);
>>  	list_add(&stp->st_perfile, &fp->fi_stateids);
>> +	spin_unlock(&fp->fi_lock);
>>
>>  out_unlock:
>> -	spin_unlock(&fp->fi_lock);
>>  	spin_unlock(&oo->oo_owner.so_client->cl_lock);
>>  	if (retstp) {
>> -		mutex_lock(&retstp->st_mutex);
>>  		/* To keep mutex tracking happy */
>>  		mutex_unlock(&stp->st_mutex);
>>  		stp = retstp;
>> @@ -4403,9 +4415,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, 
>> struct svc_fh *current_fh, struct nf
>>  		status = nfs4_check_deleg(cl, open, &dp);
>>  		if (status)
>>  			goto out;
>> -		spin_lock(&fp->fi_lock);
>>  		stp = nfsd4_find_existing_open(fp, open);
>> -		spin_unlock(&fp->fi_lock);
>>  	} else {
>>  		open->op_file = NULL;
>>  		status = nfserr_bad_stateid;
>> @@ -4419,7 +4429,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, 
>> struct svc_fh *current_fh, struct nf
>>  	 */
>>  	if (stp) {
>>  		/* Stateid was found, this is an OPEN upgrade */
>> -		mutex_lock(&stp->st_mutex);
>>  		status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
>>  		if (status) {
>>  			mutex_unlock(&stp->st_mutex);
>> @@ -5294,7 +5303,6 @@ static void nfsd4_close_open_stateid(struct 
>> nfs4_ol_stateid *s)
>>  	bool unhashed;
>>  	LIST_HEAD(reaplist);
>>
>> -	s->st_stid.sc_type = NFS4_CLOSED_STID;
>>  	spin_lock(&clp->cl_lock);
>>  	unhashed = unhash_open_stateid(s, &reaplist);
>>
>> @@ -5335,6 +5343,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct 
>> nfsd4_compound_state *cstate,
>>  	if (status)
>>  		goto out;
>>  	nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
>> +	stp->st_stid.sc_type = NFS4_CLOSED_STID;
>>  	mutex_unlock(&stp->st_mutex);
>>
>>  	nfsd4_close_open_stateid(stp);
>> -- 
>> 2.9.3
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" 
> in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0c04f81aa63b..17473a092d01 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3503,11 +3503,12 @@  static const struct nfs4_stateowner_operations openowner_ops = {
 static struct nfs4_ol_stateid *
 nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
 {
-	struct nfs4_ol_stateid *local, *ret = NULL;
+	struct nfs4_ol_stateid *local, *ret;
 	struct nfs4_openowner *oo = open->op_openowner;
 
-	lockdep_assert_held(&fp->fi_lock);
-
+retry:
+	ret = NULL;
+	spin_lock(&fp->fi_lock);
 	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
 		/* ignore lock owners */
 		if (local->st_stateowner->so_is_open_owner == 0)
@@ -3518,6 +3519,18 @@  nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
 			break;
 		}
 	}
+	spin_unlock(&fp->fi_lock);
+
+	/* Did we race with CLOSE? */
+	if (ret) {
+		mutex_lock(&ret->st_mutex);
+		if (ret->st_stid.sc_type != NFS4_OPEN_STID) {
+			mutex_unlock(&ret->st_mutex);
+			nfs4_put_stid(&ret->st_stid);
+			goto retry;
+		}
+	}
+
 	return ret;
 }
 
@@ -3566,7 +3579,6 @@  init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
 	mutex_lock(&stp->st_mutex);
 
 	spin_lock(&oo->oo_owner.so_client->cl_lock);
-	spin_lock(&fp->fi_lock);
 
 	retstp = nfsd4_find_existing_open(fp, open);
 	if (retstp)
@@ -3583,13 +3595,13 @@  init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
 	stp->st_deny_bmap = 0;
 	stp->st_openstp = NULL;
 	list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
+	spin_lock(&fp->fi_lock);
 	list_add(&stp->st_perfile, &fp->fi_stateids);
+	spin_unlock(&fp->fi_lock);
 
 out_unlock:
-	spin_unlock(&fp->fi_lock);
 	spin_unlock(&oo->oo_owner.so_client->cl_lock);
 	if (retstp) {
-		mutex_lock(&retstp->st_mutex);
 		/* To keep mutex tracking happy */
 		mutex_unlock(&stp->st_mutex);
 		stp = retstp;
@@ -4403,9 +4415,7 @@  nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 		status = nfs4_check_deleg(cl, open, &dp);
 		if (status)
 			goto out;
-		spin_lock(&fp->fi_lock);
 		stp = nfsd4_find_existing_open(fp, open);
-		spin_unlock(&fp->fi_lock);
 	} else {
 		open->op_file = NULL;
 		status = nfserr_bad_stateid;
@@ -4419,7 +4429,6 @@  nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	 */
 	if (stp) {
 		/* Stateid was found, this is an OPEN upgrade */
-		mutex_lock(&stp->st_mutex);
 		status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
 		if (status) {
 			mutex_unlock(&stp->st_mutex);
@@ -5294,7 +5303,6 @@  static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
 	bool unhashed;
 	LIST_HEAD(reaplist);
 
-	s->st_stid.sc_type = NFS4_CLOSED_STID;
 	spin_lock(&clp->cl_lock);
 	unhashed = unhash_open_stateid(s, &reaplist);
 
@@ -5335,6 +5343,7 @@  nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out; 
 	nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
+	stp->st_stid.sc_type = NFS4_CLOSED_STID;
 	mutex_unlock(&stp->st_mutex);
 
 	nfsd4_close_open_stateid(stp);