diff mbox

[v3,2/5] nfsd: have nfsd4_lock use blocking locks for v4.1+ locks

Message ID 1474057707-31286-3-git-send-email-jlayton@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jeff Layton Sept. 16, 2016, 8:28 p.m. UTC
Create a new per-lockowner+per-inode structure that contains a
file_lock. Have nfsd4_lock add this structure to the lockowner's list
prior to setting the lock. Then call the vfs and request a blocking lock
(by setting FL_SLEEP). If we get anything besides FILE_LOCK_DEFERRED
back, then we dequeue the block structure and free it. When the next
lock request comes in, we'll look for an existing block for the same
filehandle and dequeue and reuse it if there is one.

When the lock comes free (a'la an lm_notify call), we dequeue it
from the lockowner's list and kick off a CB_NOTIFY_LOCK callback to
inform the client that it should retry the lock request.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/nfsd/nfs4state.c | 164 ++++++++++++++++++++++++++++++++++++++++++++++------
 fs/nfsd/state.h     |  12 +++-
 2 files changed, 156 insertions(+), 20 deletions(-)

Comments

J. Bruce Fields Sept. 23, 2016, 9:19 p.m. UTC | #1
On Fri, Sep 16, 2016 at 04:28:24PM -0400, Jeff Layton wrote:
> Create a new per-lockowner+per-inode structure that contains a
> file_lock. Have nfsd4_lock add this structure to the lockowner's list
> prior to setting the lock. Then call the vfs and request a blocking lock
> (by setting FL_SLEEP). If we get anything besides FILE_LOCK_DEFERRED

That doesn't sound right.  FILE_LOCK_DEFERRED is a special case for
asynchronous locking--it means "I don't know whether there's a conflict
or not, it may take a while to check", not "there's a conflict".

(Ugh.  I apologize for the asynchronous locking code.)

--b.

> back, then we dequeue the block structure and free it. When the next
> lock request comes in, we'll look for an existing block for the same
> filehandle and dequeue and reuse it if there is one.
> 
> When the lock comes free (a'la an lm_notify call), we dequeue it
> from the lockowner's list and kick off a CB_NOTIFY_LOCK callback to
> inform the client that it should retry the lock request.
> 
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
>  fs/nfsd/nfs4state.c | 164 ++++++++++++++++++++++++++++++++++++++++++++++------
>  fs/nfsd/state.h     |  12 +++-
>  2 files changed, 156 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index a204d7e109d4..ca0db4974e5b 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab;
>  static void free_session(struct nfsd4_session *);
>  
>  static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
>  
>  static bool is_session_dead(struct nfsd4_session *ses)
>  {
> @@ -210,6 +211,84 @@ static void nfsd4_put_session(struct nfsd4_session *ses)
>  	spin_unlock(&nn->client_lock);
>  }
>  
> +static struct nfsd4_blocked_lock *
> +find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
> +			struct nfsd_net *nn)
> +{
> +	struct nfsd4_blocked_lock *cur, *found = NULL;
> +
> +	spin_lock(&nn->client_lock);
> +	list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
> +		if (fh_match(fh, &cur->nbl_fh)) {
> +			list_del_init(&cur->nbl_list);
> +			found = cur;
> +			break;
> +		}
> +	}
> +	spin_unlock(&nn->client_lock);
> +	if (found)
> +		posix_unblock_lock(&found->nbl_lock);
> +	return found;
> +}
> +
> +static struct nfsd4_blocked_lock *
> +find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
> +			struct nfsd_net *nn)
> +{
> +	struct nfsd4_blocked_lock *nbl;
> +
> +	nbl = find_blocked_lock(lo, fh, nn);
> +	if (!nbl) {
> +		nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
> +		if (nbl) {
> +			fh_copy_shallow(&nbl->nbl_fh, fh);
> +			locks_init_lock(&nbl->nbl_lock);
> +			nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
> +					&nfsd4_cb_notify_lock_ops,
> +					NFSPROC4_CLNT_CB_NOTIFY_LOCK);
> +		}
> +	}
> +	return nbl;
> +}
> +
> +static void
> +free_blocked_lock(struct nfsd4_blocked_lock *nbl)
> +{
> +	locks_release_private(&nbl->nbl_lock);
> +	kfree(nbl);
> +}
> +
> +static int
> +nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
> +{
> +	/*
> +	 * Since this is just an optimization, we don't try very hard if it
> +	 * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
> +	 * just quit trying on anything else.
> +	 */
> +	switch (task->tk_status) {
> +	case -NFS4ERR_DELAY:
> +		rpc_delay(task, 1 * HZ);
> +		return 0;
> +	default:
> +		return 1;
> +	}
> +}
> +
> +static void
> +nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
> +{
> +	struct nfsd4_blocked_lock	*nbl = container_of(cb,
> +						struct nfsd4_blocked_lock, nbl_cb);
> +
> +	free_blocked_lock(nbl);
> +}
> +
> +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
> +	.done		= nfsd4_cb_notify_lock_done,
> +	.release	= nfsd4_cb_notify_lock_release,
> +};
> +
>  static inline struct nfs4_stateowner *
>  nfs4_get_stateowner(struct nfs4_stateowner *sop)
>  {
> @@ -5309,7 +5388,29 @@ nfsd4_fl_put_owner(fl_owner_t owner)
>  		nfs4_put_stateowner(&lo->lo_owner);
>  }
>  
> +static void
> +nfsd4_lm_notify(struct file_lock *fl)
> +{
> +	struct nfs4_lockowner		*lo = (struct nfs4_lockowner *)fl->fl_owner;
> +	struct net			*net = lo->lo_owner.so_client->net;
> +	struct nfsd_net			*nn = net_generic(net, nfsd_net_id);
> +	struct nfsd4_blocked_lock	*nbl = container_of(fl,
> +						struct nfsd4_blocked_lock, nbl_lock);
> +	bool queue = false;
> +
> +	spin_lock(&nn->client_lock);
> +	if (!list_empty(&nbl->nbl_list)) {
> +		list_del_init(&nbl->nbl_list);
> +		queue = true;
> +	}
> +	spin_unlock(&nn->client_lock);
> +
> +	if (queue)
> +		nfsd4_run_cb(&nbl->nbl_cb);
> +}
> +
>  static const struct lock_manager_operations nfsd_posix_mng_ops  = {
> +	.lm_notify = nfsd4_lm_notify,
>  	.lm_get_owner = nfsd4_fl_get_owner,
>  	.lm_put_owner = nfsd4_fl_put_owner,
>  };
> @@ -5407,6 +5508,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
>  	lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
>  	if (!lo)
>  		return NULL;
> +	INIT_LIST_HEAD(&lo->lo_blocked);
>  	INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
>  	lo->lo_owner.so_is_open_owner = 0;
>  	lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
> @@ -5588,12 +5690,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	struct nfs4_ol_stateid *open_stp = NULL;
>  	struct nfs4_file *fp;
>  	struct file *filp = NULL;
> +	struct nfsd4_blocked_lock *nbl = NULL;
>  	struct file_lock *file_lock = NULL;
>  	struct file_lock *conflock = NULL;
>  	__be32 status = 0;
>  	int lkflg;
>  	int err;
>  	bool new = false;
> +	unsigned char fl_type;
> +	unsigned int fl_flags = FL_POSIX;
>  	struct net *net = SVC_NET(rqstp);
>  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>  
> @@ -5658,46 +5763,55 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	if (!locks_in_grace(net) && lock->lk_reclaim)
>  		goto out;
>  
> -	file_lock = locks_alloc_lock();
> -	if (!file_lock) {
> -		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
> -		status = nfserr_jukebox;
> -		goto out;
> -	}
> -
>  	fp = lock_stp->st_stid.sc_file;
>  	switch (lock->lk_type) {
> -		case NFS4_READ_LT:
>  		case NFS4_READW_LT:
> +			if (nfsd4_has_session(cstate))
> +				fl_flags |= FL_SLEEP;
> +			/* Fallthrough */
> +		case NFS4_READ_LT:
>  			spin_lock(&fp->fi_lock);
>  			filp = find_readable_file_locked(fp);
>  			if (filp)
>  				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
>  			spin_unlock(&fp->fi_lock);
> -			file_lock->fl_type = F_RDLCK;
> +			fl_type = F_RDLCK;
>  			break;
> -		case NFS4_WRITE_LT:
>  		case NFS4_WRITEW_LT:
> +			if (nfsd4_has_session(cstate))
> +				fl_flags |= FL_SLEEP;
> +			/* Fallthrough */
> +		case NFS4_WRITE_LT:
>  			spin_lock(&fp->fi_lock);
>  			filp = find_writeable_file_locked(fp);
>  			if (filp)
>  				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
>  			spin_unlock(&fp->fi_lock);
> -			file_lock->fl_type = F_WRLCK;
> +			fl_type = F_WRLCK;
>  			break;
>  		default:
>  			status = nfserr_inval;
>  		goto out;
>  	}
> +
>  	if (!filp) {
>  		status = nfserr_openmode;
>  		goto out;
>  	}
>  
> +	nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
> +	if (!nbl) {
> +		dprintk("NFSD: %s: unable to allocate block!\n", __func__);
> +		status = nfserr_jukebox;
> +		goto out;
> +	}
> +
> +	file_lock = &nbl->nbl_lock;
> +	file_lock->fl_type = fl_type;
>  	file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
>  	file_lock->fl_pid = current->tgid;
>  	file_lock->fl_file = filp;
> -	file_lock->fl_flags = FL_POSIX;
> +	file_lock->fl_flags = fl_flags;
>  	file_lock->fl_lmops = &nfsd_posix_mng_ops;
>  	file_lock->fl_start = lock->lk_offset;
>  	file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
> @@ -5710,18 +5824,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		goto out;
>  	}
>  
> +	if (fl_flags & FL_SLEEP) {
> +		spin_lock(&nn->client_lock);
> +		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
> +		spin_unlock(&nn->client_lock);
> +	}
> +
>  	err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
> -	switch (-err) {
> +	switch (err) {
>  	case 0: /* success! */
>  		nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
>  		status = 0;
>  		break;
> -	case (EAGAIN):		/* conflock holds conflicting lock */
> +	case FILE_LOCK_DEFERRED:
> +		nbl = NULL;
> +		/* Fallthrough */
> +	case -EAGAIN:		/* conflock holds conflicting lock */
>  		status = nfserr_denied;
>  		dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
>  		nfs4_set_lock_denied(conflock, &lock->lk_denied);
>  		break;
> -	case (EDEADLK):
> +	case -EDEADLK:
>  		status = nfserr_deadlock;
>  		break;
>  	default:
> @@ -5730,6 +5853,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		break;
>  	}
>  out:
> +	if (nbl) {
> +		/* dequeue it if we queued it before */
> +		if (fl_flags & FL_SLEEP) {
> +			spin_lock(&nn->client_lock);
> +			list_del_init(&nbl->nbl_list);
> +			spin_unlock(&nn->client_lock);
> +		}
> +		free_blocked_lock(nbl);
> +	}
>  	if (filp)
>  		fput(filp);
>  	if (lock_stp) {
> @@ -5753,8 +5885,6 @@ out:
>  	if (open_stp)
>  		nfs4_put_stid(&open_stp->st_stid);
>  	nfsd4_bump_seqid(cstate, status);
> -	if (file_lock)
> -		locks_free_lock(file_lock);
>  	if (conflock)
>  		locks_free_lock(conflock);
>  	return status;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 88d029dd13aa..e45c183a8bf7 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -440,11 +440,11 @@ struct nfs4_openowner {
>  /*
>   * Represents a generic "lockowner". Similar to an openowner. References to it
>   * are held by the lock stateids that are created on its behalf. This object is
> - * a superset of the nfs4_stateowner struct (or would be if it needed any extra
> - * fields).
> + * a superset of the nfs4_stateowner struct.
>   */
>  struct nfs4_lockowner {
> -	struct nfs4_stateowner	lo_owner; /* must be first element */
> +	struct nfs4_stateowner	lo_owner;	/* must be first element */
> +	struct list_head	lo_blocked;	/* blocked file_locks */
>  };
>  
>  static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
> @@ -580,7 +580,13 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
>  	return (s32)(a->si_generation - b->si_generation) > 0;
>  }
>  
> +/*
> + * When a client tries to get a lock on a file, we set one of these objects
> + * on the blocking lock. When the lock becomes free, we can then issue a
> + * CB_NOTIFY_LOCK to the server.
> + */
>  struct nfsd4_blocked_lock {
> +	struct list_head	nbl_list;
>  	struct file_lock	nbl_lock;
>  	struct knfsd_fh		nbl_fh;
>  	struct nfsd4_callback	nbl_cb;
> -- 
> 2.7.4
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton Sept. 24, 2016, 12:43 a.m. UTC | #2
On Fri, 2016-09-23 at 17:19 -0400, J. Bruce Fields wrote:
> On Fri, Sep 16, 2016 at 04:28:24PM -0400, Jeff Layton wrote:
> > 
> > Create a new per-lockowner+per-inode structure that contains a
> > file_lock. Have nfsd4_lock add this structure to the lockowner's list
> > prior to setting the lock. Then call the vfs and request a blocking lock
> > (by setting FL_SLEEP). If we get anything besides FILE_LOCK_DEFERRED
> 
> That doesn't sound right.  FILE_LOCK_DEFERRED is a special case for
> asynchronous locking--it means "I don't know whether there's a conflict
> or not, it may take a while to check", not "there's a conflict".
> 
> (Ugh.  I apologize for the asynchronous locking code.)
> 
> --b.
> 

The local file locking code definitely uses this return code to mean
"This lock is blocked, and I'll call your lm_notify when it's
unblocked", which is exactly what we rely on here.

There is a place that uses it in the way you mention though, and that is
when DLM and svc lockd are interacting via dlm_posix_lock(). lockd can't
be in play here since this is all NFSv4, so I think the code does handle
this correctly.

That said...maybe should probably think about some way to disambiguate
those two states in the return code. It is horribly confusing.
 
> > back, then we dequeue the block structure and free it. When the next
> > lock request comes in, we'll look for an existing block for the same
> > filehandle and dequeue and reuse it if there is one.
> > 
> > When the lock comes free (a'la an lm_notify call), we dequeue it
> > from the lockowner's list and kick off a CB_NOTIFY_LOCK callback to
> > inform the client that it should retry the lock request.
> > 
> > Signed-off-by: Jeff Layton <jlayton@redhat.com>
> > ---
> >  fs/nfsd/nfs4state.c | 164 ++++++++++++++++++++++++++++++++++++++++++++++------
> >  fs/nfsd/state.h     |  12 +++-
> >  2 files changed, 156 insertions(+), 20 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index a204d7e109d4..ca0db4974e5b 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab;
> >  static void free_session(struct nfsd4_session *);
> >  
> >  static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> > +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
> >  
> >  static bool is_session_dead(struct nfsd4_session *ses)
> >  {
> > @@ -210,6 +211,84 @@ static void nfsd4_put_session(struct nfsd4_session *ses)
> >  	spin_unlock(&nn->client_lock);
> >  }
> >  
> > +static struct nfsd4_blocked_lock *
> > +find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
> > +			struct nfsd_net *nn)
> > +{
> > +	struct nfsd4_blocked_lock *cur, *found = NULL;
> > +
> > +	spin_lock(&nn->client_lock);
> > +	list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
> > +		if (fh_match(fh, &cur->nbl_fh)) {
> > +			list_del_init(&cur->nbl_list);
> > +			found = cur;
> > +			break;
> > +		}
> > +	}
> > +	spin_unlock(&nn->client_lock);
> > +	if (found)
> > +		posix_unblock_lock(&found->nbl_lock);
> > +	return found;
> > +}
> > +
> > +static struct nfsd4_blocked_lock *
> > +find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
> > +			struct nfsd_net *nn)
> > +{
> > +	struct nfsd4_blocked_lock *nbl;
> > +
> > +	nbl = find_blocked_lock(lo, fh, nn);
> > +	if (!nbl) {
> > +		nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
> > +		if (nbl) {
> > +			fh_copy_shallow(&nbl->nbl_fh, fh);
> > +			locks_init_lock(&nbl->nbl_lock);
> > +			nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
> > +					&nfsd4_cb_notify_lock_ops,
> > +					NFSPROC4_CLNT_CB_NOTIFY_LOCK);
> > +		}
> > +	}
> > +	return nbl;
> > +}
> > +
> > +static void
> > +free_blocked_lock(struct nfsd4_blocked_lock *nbl)
> > +{
> > +	locks_release_private(&nbl->nbl_lock);
> > +	kfree(nbl);
> > +}
> > +
> > +static int
> > +nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
> > +{
> > +	/*
> > +	 * Since this is just an optimization, we don't try very hard if it
> > +	 * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
> > +	 * just quit trying on anything else.
> > +	 */
> > +	switch (task->tk_status) {
> > +	case -NFS4ERR_DELAY:
> > +		rpc_delay(task, 1 * HZ);
> > +		return 0;
> > +	default:
> > +		return 1;
> > +	}
> > +}
> > +
> > +static void
> > +nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
> > +{
> > +	struct nfsd4_blocked_lock	*nbl = container_of(cb,
> > +						struct nfsd4_blocked_lock, nbl_cb);
> > +
> > +	free_blocked_lock(nbl);
> > +}
> > +
> > +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
> > +	.done		= nfsd4_cb_notify_lock_done,
> > +	.release	= nfsd4_cb_notify_lock_release,
> > +};
> > +
> >  static inline struct nfs4_stateowner *
> >  nfs4_get_stateowner(struct nfs4_stateowner *sop)
> >  {
> > @@ -5309,7 +5388,29 @@ nfsd4_fl_put_owner(fl_owner_t owner)
> >  		nfs4_put_stateowner(&lo->lo_owner);
> >  }
> >  
> > +static void
> > +nfsd4_lm_notify(struct file_lock *fl)
> > +{
> > +	struct nfs4_lockowner		*lo = (struct nfs4_lockowner *)fl->fl_owner;
> > +	struct net			*net = lo->lo_owner.so_client->net;
> > +	struct nfsd_net			*nn = net_generic(net, nfsd_net_id);
> > +	struct nfsd4_blocked_lock	*nbl = container_of(fl,
> > +						struct nfsd4_blocked_lock, nbl_lock);
> > +	bool queue = false;
> > +
> > +	spin_lock(&nn->client_lock);
> > +	if (!list_empty(&nbl->nbl_list)) {
> > +		list_del_init(&nbl->nbl_list);
> > +		queue = true;
> > +	}
> > +	spin_unlock(&nn->client_lock);
> > +
> > +	if (queue)
> > +		nfsd4_run_cb(&nbl->nbl_cb);
> > +}
> > +
> >  static const struct lock_manager_operations nfsd_posix_mng_ops  = {
> > +	.lm_notify = nfsd4_lm_notify,
> >  	.lm_get_owner = nfsd4_fl_get_owner,
> >  	.lm_put_owner = nfsd4_fl_put_owner,
> >  };
> > @@ -5407,6 +5508,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
> >  	lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
> >  	if (!lo)
> >  		return NULL;
> > +	INIT_LIST_HEAD(&lo->lo_blocked);
> >  	INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
> >  	lo->lo_owner.so_is_open_owner = 0;
> >  	lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
> > @@ -5588,12 +5690,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  	struct nfs4_ol_stateid *open_stp = NULL;
> >  	struct nfs4_file *fp;
> >  	struct file *filp = NULL;
> > +	struct nfsd4_blocked_lock *nbl = NULL;
> >  	struct file_lock *file_lock = NULL;
> >  	struct file_lock *conflock = NULL;
> >  	__be32 status = 0;
> >  	int lkflg;
> >  	int err;
> >  	bool new = false;
> > +	unsigned char fl_type;
> > +	unsigned int fl_flags = FL_POSIX;
> >  	struct net *net = SVC_NET(rqstp);
> >  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> >  
> > @@ -5658,46 +5763,55 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  	if (!locks_in_grace(net) && lock->lk_reclaim)
> >  		goto out;
> >  
> > -	file_lock = locks_alloc_lock();
> > -	if (!file_lock) {
> > -		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
> > -		status = nfserr_jukebox;
> > -		goto out;
> > -	}
> > -
> >  	fp = lock_stp->st_stid.sc_file;
> >  	switch (lock->lk_type) {
> > -		case NFS4_READ_LT:
> >  		case NFS4_READW_LT:
> > +			if (nfsd4_has_session(cstate))
> > +				fl_flags |= FL_SLEEP;
> > +			/* Fallthrough */
> > +		case NFS4_READ_LT:
> >  			spin_lock(&fp->fi_lock);
> >  			filp = find_readable_file_locked(fp);
> >  			if (filp)
> >  				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
> >  			spin_unlock(&fp->fi_lock);
> > -			file_lock->fl_type = F_RDLCK;
> > +			fl_type = F_RDLCK;
> >  			break;
> > -		case NFS4_WRITE_LT:
> >  		case NFS4_WRITEW_LT:
> > +			if (nfsd4_has_session(cstate))
> > +				fl_flags |= FL_SLEEP;
> > +			/* Fallthrough */
> > +		case NFS4_WRITE_LT:
> >  			spin_lock(&fp->fi_lock);
> >  			filp = find_writeable_file_locked(fp);
> >  			if (filp)
> >  				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
> >  			spin_unlock(&fp->fi_lock);
> > -			file_lock->fl_type = F_WRLCK;
> > +			fl_type = F_WRLCK;
> >  			break;
> >  		default:
> >  			status = nfserr_inval;
> >  		goto out;
> >  	}
> > +
> >  	if (!filp) {
> >  		status = nfserr_openmode;
> >  		goto out;
> >  	}
> >  
> > +	nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
> > +	if (!nbl) {
> > +		dprintk("NFSD: %s: unable to allocate block!\n", __func__);
> > +		status = nfserr_jukebox;
> > +		goto out;
> > +	}
> > +
> > +	file_lock = &nbl->nbl_lock;
> > +	file_lock->fl_type = fl_type;
> >  	file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
> >  	file_lock->fl_pid = current->tgid;
> >  	file_lock->fl_file = filp;
> > -	file_lock->fl_flags = FL_POSIX;
> > +	file_lock->fl_flags = fl_flags;
> >  	file_lock->fl_lmops = &nfsd_posix_mng_ops;
> >  	file_lock->fl_start = lock->lk_offset;
> >  	file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
> > @@ -5710,18 +5824,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  		goto out;
> >  	}
> >  
> > +	if (fl_flags & FL_SLEEP) {
> > +		spin_lock(&nn->client_lock);
> > +		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
> > +		spin_unlock(&nn->client_lock);
> > +	}
> > +
> >  	err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
> > -	switch (-err) {
> > +	switch (err) {
> >  	case 0: /* success! */
> >  		nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
> >  		status = 0;
> >  		break;
> > -	case (EAGAIN):		/* conflock holds conflicting lock */
> > +	case FILE_LOCK_DEFERRED:
> > +		nbl = NULL;
> > +		/* Fallthrough */
> > +	case -EAGAIN:		/* conflock holds conflicting lock */
> >  		status = nfserr_denied;
> >  		dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
> >  		nfs4_set_lock_denied(conflock, &lock->lk_denied);
> >  		break;
> > -	case (EDEADLK):
> > +	case -EDEADLK:
> >  		status = nfserr_deadlock;
> >  		break;
> >  	default:
> > @@ -5730,6 +5853,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  		break;
> >  	}
> >  out:
> > +	if (nbl) {
> > +		/* dequeue it if we queued it before */
> > +		if (fl_flags & FL_SLEEP) {
> > +			spin_lock(&nn->client_lock);
> > +			list_del_init(&nbl->nbl_list);
> > +			spin_unlock(&nn->client_lock);
> > +		}
> > +		free_blocked_lock(nbl);
> > +	}
> >  	if (filp)
> >  		fput(filp);
> >  	if (lock_stp) {
> > @@ -5753,8 +5885,6 @@ out:
> >  	if (open_stp)
> >  		nfs4_put_stid(&open_stp->st_stid);
> >  	nfsd4_bump_seqid(cstate, status);
> > -	if (file_lock)
> > -		locks_free_lock(file_lock);
> >  	if (conflock)
> >  		locks_free_lock(conflock);
> >  	return status;
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index 88d029dd13aa..e45c183a8bf7 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -440,11 +440,11 @@ struct nfs4_openowner {
> >  /*
> >   * Represents a generic "lockowner". Similar to an openowner. References to it
> >   * are held by the lock stateids that are created on its behalf. This object is
> > - * a superset of the nfs4_stateowner struct (or would be if it needed any extra
> > - * fields).
> > + * a superset of the nfs4_stateowner struct.
> >   */
> >  struct nfs4_lockowner {
> > -	struct nfs4_stateowner	lo_owner; /* must be first element */
> > +	struct nfs4_stateowner	lo_owner;	/* must be first element */
> > +	struct list_head	lo_blocked;	/* blocked file_locks */
> >  };
> >  
> >  static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
> > @@ -580,7 +580,13 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> >  	return (s32)(a->si_generation - b->si_generation) > 0;
> >  }
> >  
> > +/*
> > + * When a client tries to get a lock on a file, we set one of these objects
> > + * on the blocking lock. When the lock becomes free, we can then issue a
> > + * CB_NOTIFY_LOCK to the server.
> > + */
> >  struct nfsd4_blocked_lock {
> > +	struct list_head	nbl_list;
> >  	struct file_lock	nbl_lock;
> >  	struct knfsd_fh		nbl_fh;
> >  	struct nfsd4_callback	nbl_cb;
> > -- 
> > 2.7.4
J. Bruce Fields Sept. 24, 2016, 3:02 p.m. UTC | #3
On Fri, Sep 23, 2016 at 08:43:44PM -0400, Jeff Layton wrote:
> On Fri, 2016-09-23 at 17:19 -0400, J. Bruce Fields wrote:
> > On Fri, Sep 16, 2016 at 04:28:24PM -0400, Jeff Layton wrote:
> > > 
> > > Create a new per-lockowner+per-inode structure that contains a
> > > file_lock. Have nfsd4_lock add this structure to the lockowner's list
> > > prior to setting the lock. Then call the vfs and request a blocking lock
> > > (by setting FL_SLEEP). If we get anything besides FILE_LOCK_DEFERRED
> > 
> > That doesn't sound right.  FILE_LOCK_DEFERRED is a special case for
> > asynchronous locking--it means "I don't know whether there's a conflict
> > or not, it may take a while to check", not "there's a conflict".
> > 
> > (Ugh.  I apologize for the asynchronous locking code.)
> > 
> > --b.
> > 
> 
> The local file locking code definitely uses this return code to mean
> "This lock is blocked, and I'll call your lm_notify when it's
> unblocked", which is exactly what we rely on here.
> 
> There is a place that uses it in the way you mention though, and that is
> when DLM and svc lockd are interacting via dlm_posix_lock(). lockd can't
> be in play here since this is all NFSv4, so I think the code does handle
> this correctly.

Got it, my apologies!  I'll read some more....

The patches look fine as far as I can tell.

> That said...maybe should probably think about some way to disambiguate
> those two states in the return code. It is horribly confusing.

Yes.

Honestly maybe the asynchronous dlm case just shouldn't be there.  I
remember thinking multithreading lockd would accomplish the same.  But
maybe we already have that in the nfsv4 case, in which case who cares.
(Well, except maybe the locking effectively serializes locking anyway, I
haven't looked.)

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton Sept. 24, 2016, 4:48 p.m. UTC | #4
On Sat, 2016-09-24 at 11:02 -0400, J. Bruce Fields wrote:
> On Fri, Sep 23, 2016 at 08:43:44PM -0400, Jeff Layton wrote:
> > 
> > On Fri, 2016-09-23 at 17:19 -0400, J. Bruce Fields wrote:
> > > 
> > > On Fri, Sep 16, 2016 at 04:28:24PM -0400, Jeff Layton wrote:
> > > > 
> > > > 
> > > > Create a new per-lockowner+per-inode structure that contains a
> > > > file_lock. Have nfsd4_lock add this structure to the lockowner's list
> > > > prior to setting the lock. Then call the vfs and request a blocking lock
> > > > (by setting FL_SLEEP). If we get anything besides FILE_LOCK_DEFERRED
> > > 
> > > That doesn't sound right.  FILE_LOCK_DEFERRED is a special case for
> > > asynchronous locking--it means "I don't know whether there's a conflict
> > > or not, it may take a while to check", not "there's a conflict".
> > > 
> > > (Ugh.  I apologize for the asynchronous locking code.)
> > > 
> > > --b.
> > > 
> > 
> > The local file locking code definitely uses this return code to mean
> > "This lock is blocked, and I'll call your lm_notify when it's
> > unblocked", which is exactly what we rely on here.
> > 
> > There is a place that uses it in the way you mention though, and that is
> > when DLM and svc lockd are interacting via dlm_posix_lock(). lockd can't
> > be in play here since this is all NFSv4, so I think the code does handle
> > this correctly.
> 
> Got it, my apologies!  I'll read some more....
> 
> The patches look fine as far as I can tell.
> 

No worries. It is confusing code, especially once lockd is in the mix.
Thanks for having a look at the set.

> > 
> > That said...maybe should probably think about some way to disambiguate
> > those two states in the return code. It is horribly confusing.
> 
> Yes.
> 
> Honestly maybe the asynchronous dlm case just shouldn't be there.  I
> remember thinking multithreading lockd would accomplish the same.  But
> maybe we already have that in the nfsv4 case, in which case who cares.
> (Well, except maybe the locking effectively serializes locking anyway, I
> haven't looked.)
> 

Maybe, but it's hard to know who is currently relying on this, and as
far as I can tell, it's not broken. I'd hate to remove the functionality
without some way to gauge that.

What I was thinking was that we could add a new FILE_LOCK_BLOCKED error
code and use that everywhere but the DLM case. The DLM case could then
use FILE_LOCK_DEFERRED to convey the situation you mentioned before.

That said, I'd rather not do that in the context of this set. I'd need
to spend some time in the lockd code especially, to make sure that that 
wouldn't break anything.
diff mbox

Patch

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a204d7e109d4..ca0db4974e5b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -99,6 +99,7 @@  static struct kmem_cache *odstate_slab;
 static void free_session(struct nfsd4_session *);
 
 static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
 
 static bool is_session_dead(struct nfsd4_session *ses)
 {
@@ -210,6 +211,84 @@  static void nfsd4_put_session(struct nfsd4_session *ses)
 	spin_unlock(&nn->client_lock);
 }
 
+static struct nfsd4_blocked_lock *
+find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
+			struct nfsd_net *nn)
+{
+	struct nfsd4_blocked_lock *cur, *found = NULL;
+
+	spin_lock(&nn->client_lock);
+	list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
+		if (fh_match(fh, &cur->nbl_fh)) {
+			list_del_init(&cur->nbl_list);
+			found = cur;
+			break;
+		}
+	}
+	spin_unlock(&nn->client_lock);
+	if (found)
+		posix_unblock_lock(&found->nbl_lock);
+	return found;
+}
+
+static struct nfsd4_blocked_lock *
+find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
+			struct nfsd_net *nn)
+{
+	struct nfsd4_blocked_lock *nbl;
+
+	nbl = find_blocked_lock(lo, fh, nn);
+	if (!nbl) {
+		nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
+		if (nbl) {
+			fh_copy_shallow(&nbl->nbl_fh, fh);
+			locks_init_lock(&nbl->nbl_lock);
+			nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
+					&nfsd4_cb_notify_lock_ops,
+					NFSPROC4_CLNT_CB_NOTIFY_LOCK);
+		}
+	}
+	return nbl;
+}
+
+static void
+free_blocked_lock(struct nfsd4_blocked_lock *nbl)
+{
+	locks_release_private(&nbl->nbl_lock);
+	kfree(nbl);
+}
+
+static int
+nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
+{
+	/*
+	 * Since this is just an optimization, we don't try very hard if it
+	 * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
+	 * just quit trying on anything else.
+	 */
+	switch (task->tk_status) {
+	case -NFS4ERR_DELAY:
+		rpc_delay(task, 1 * HZ);
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+static void
+nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
+{
+	struct nfsd4_blocked_lock	*nbl = container_of(cb,
+						struct nfsd4_blocked_lock, nbl_cb);
+
+	free_blocked_lock(nbl);
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+	.done		= nfsd4_cb_notify_lock_done,
+	.release	= nfsd4_cb_notify_lock_release,
+};
+
 static inline struct nfs4_stateowner *
 nfs4_get_stateowner(struct nfs4_stateowner *sop)
 {
@@ -5309,7 +5388,29 @@  nfsd4_fl_put_owner(fl_owner_t owner)
 		nfs4_put_stateowner(&lo->lo_owner);
 }
 
+static void
+nfsd4_lm_notify(struct file_lock *fl)
+{
+	struct nfs4_lockowner		*lo = (struct nfs4_lockowner *)fl->fl_owner;
+	struct net			*net = lo->lo_owner.so_client->net;
+	struct nfsd_net			*nn = net_generic(net, nfsd_net_id);
+	struct nfsd4_blocked_lock	*nbl = container_of(fl,
+						struct nfsd4_blocked_lock, nbl_lock);
+	bool queue = false;
+
+	spin_lock(&nn->client_lock);
+	if (!list_empty(&nbl->nbl_list)) {
+		list_del_init(&nbl->nbl_list);
+		queue = true;
+	}
+	spin_unlock(&nn->client_lock);
+
+	if (queue)
+		nfsd4_run_cb(&nbl->nbl_cb);
+}
+
 static const struct lock_manager_operations nfsd_posix_mng_ops  = {
+	.lm_notify = nfsd4_lm_notify,
 	.lm_get_owner = nfsd4_fl_get_owner,
 	.lm_put_owner = nfsd4_fl_put_owner,
 };
@@ -5407,6 +5508,7 @@  alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
 	lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
 	if (!lo)
 		return NULL;
+	INIT_LIST_HEAD(&lo->lo_blocked);
 	INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
 	lo->lo_owner.so_is_open_owner = 0;
 	lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
@@ -5588,12 +5690,15 @@  nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	struct nfs4_ol_stateid *open_stp = NULL;
 	struct nfs4_file *fp;
 	struct file *filp = NULL;
+	struct nfsd4_blocked_lock *nbl = NULL;
 	struct file_lock *file_lock = NULL;
 	struct file_lock *conflock = NULL;
 	__be32 status = 0;
 	int lkflg;
 	int err;
 	bool new = false;
+	unsigned char fl_type;
+	unsigned int fl_flags = FL_POSIX;
 	struct net *net = SVC_NET(rqstp);
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
@@ -5658,46 +5763,55 @@  nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (!locks_in_grace(net) && lock->lk_reclaim)
 		goto out;
 
-	file_lock = locks_alloc_lock();
-	if (!file_lock) {
-		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
-		status = nfserr_jukebox;
-		goto out;
-	}
-
 	fp = lock_stp->st_stid.sc_file;
 	switch (lock->lk_type) {
-		case NFS4_READ_LT:
 		case NFS4_READW_LT:
+			if (nfsd4_has_session(cstate))
+				fl_flags |= FL_SLEEP;
+			/* Fallthrough */
+		case NFS4_READ_LT:
 			spin_lock(&fp->fi_lock);
 			filp = find_readable_file_locked(fp);
 			if (filp)
 				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
 			spin_unlock(&fp->fi_lock);
-			file_lock->fl_type = F_RDLCK;
+			fl_type = F_RDLCK;
 			break;
-		case NFS4_WRITE_LT:
 		case NFS4_WRITEW_LT:
+			if (nfsd4_has_session(cstate))
+				fl_flags |= FL_SLEEP;
+			/* Fallthrough */
+		case NFS4_WRITE_LT:
 			spin_lock(&fp->fi_lock);
 			filp = find_writeable_file_locked(fp);
 			if (filp)
 				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
 			spin_unlock(&fp->fi_lock);
-			file_lock->fl_type = F_WRLCK;
+			fl_type = F_WRLCK;
 			break;
 		default:
 			status = nfserr_inval;
 		goto out;
 	}
+
 	if (!filp) {
 		status = nfserr_openmode;
 		goto out;
 	}
 
+	nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
+	if (!nbl) {
+		dprintk("NFSD: %s: unable to allocate block!\n", __func__);
+		status = nfserr_jukebox;
+		goto out;
+	}
+
+	file_lock = &nbl->nbl_lock;
+	file_lock->fl_type = fl_type;
 	file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
 	file_lock->fl_pid = current->tgid;
 	file_lock->fl_file = filp;
-	file_lock->fl_flags = FL_POSIX;
+	file_lock->fl_flags = fl_flags;
 	file_lock->fl_lmops = &nfsd_posix_mng_ops;
 	file_lock->fl_start = lock->lk_offset;
 	file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
@@ -5710,18 +5824,27 @@  nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		goto out;
 	}
 
+	if (fl_flags & FL_SLEEP) {
+		spin_lock(&nn->client_lock);
+		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
+		spin_unlock(&nn->client_lock);
+	}
+
 	err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
-	switch (-err) {
+	switch (err) {
 	case 0: /* success! */
 		nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
 		status = 0;
 		break;
-	case (EAGAIN):		/* conflock holds conflicting lock */
+	case FILE_LOCK_DEFERRED:
+		nbl = NULL;
+		/* Fallthrough */
+	case -EAGAIN:		/* conflock holds conflicting lock */
 		status = nfserr_denied;
 		dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
 		nfs4_set_lock_denied(conflock, &lock->lk_denied);
 		break;
-	case (EDEADLK):
+	case -EDEADLK:
 		status = nfserr_deadlock;
 		break;
 	default:
@@ -5730,6 +5853,15 @@  nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		break;
 	}
 out:
+	if (nbl) {
+		/* dequeue it if we queued it before */
+		if (fl_flags & FL_SLEEP) {
+			spin_lock(&nn->client_lock);
+			list_del_init(&nbl->nbl_list);
+			spin_unlock(&nn->client_lock);
+		}
+		free_blocked_lock(nbl);
+	}
 	if (filp)
 		fput(filp);
 	if (lock_stp) {
@@ -5753,8 +5885,6 @@  out:
 	if (open_stp)
 		nfs4_put_stid(&open_stp->st_stid);
 	nfsd4_bump_seqid(cstate, status);
-	if (file_lock)
-		locks_free_lock(file_lock);
 	if (conflock)
 		locks_free_lock(conflock);
 	return status;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 88d029dd13aa..e45c183a8bf7 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -440,11 +440,11 @@  struct nfs4_openowner {
 /*
  * Represents a generic "lockowner". Similar to an openowner. References to it
  * are held by the lock stateids that are created on its behalf. This object is
- * a superset of the nfs4_stateowner struct (or would be if it needed any extra
- * fields).
+ * a superset of the nfs4_stateowner struct.
  */
 struct nfs4_lockowner {
-	struct nfs4_stateowner	lo_owner; /* must be first element */
+	struct nfs4_stateowner	lo_owner;	/* must be first element */
+	struct list_head	lo_blocked;	/* blocked file_locks */
 };
 
 static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
@@ -580,7 +580,13 @@  static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
 	return (s32)(a->si_generation - b->si_generation) > 0;
 }
 
+/*
+ * When a client tries to get a lock on a file, we set one of these objects
+ * on the blocking lock. When the lock becomes free, we can then issue a
+ * CB_NOTIFY_LOCK to the server.
+ */
 struct nfsd4_blocked_lock {
+	struct list_head	nbl_list;
 	struct file_lock	nbl_lock;
 	struct knfsd_fh		nbl_fh;
 	struct nfsd4_callback	nbl_cb;