diff mbox

[RFC,03/27] pnfs: layoutreturn

Message ID 1303320392-21068-1-git-send-email-bhalevy@panasas.com (mailing list archive)
State New, archived
Headers show

Commit Message

Benny Halevy April 20, 2011, 5:26 p.m. UTC
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com>
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfs/inode.c          |    3 +-
 fs/nfs/nfs4proc.c       |   97 ++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4xdr.c        |  118 ++++++++++++++++++++++++++++++++++++++++++++--
 fs/nfs/pnfs.c           |   96 ++++++++++++++++++++++++++++++++++++++
 fs/nfs/pnfs.h           |   22 +++++++++
 include/linux/nfs4.h    |    1 +
 include/linux/nfs_xdr.h |   23 +++++++++
 7 files changed, 353 insertions(+), 7 deletions(-)

Comments

Trond Myklebust April 20, 2011, 7:53 p.m. UTC | #1
On Wed, 2011-04-20 at 20:26 +0300, Benny Halevy wrote:
> Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
> Signed-off-by: Andy Adamson <andros@citi.umich.edu>
> Signed-off-by: Andy Adamson <andros@netapp.com>
> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com>
> Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
> Signed-off-by: Fred Isaman <iisaman@netapp.com>
> Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
> Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn>
> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
> ---
>  fs/nfs/inode.c          |    3 +-
>  fs/nfs/nfs4proc.c       |   97 ++++++++++++++++++++++++++++++++++++++
>  fs/nfs/nfs4xdr.c        |  118 ++++++++++++++++++++++++++++++++++++++++++++--
>  fs/nfs/pnfs.c           |   96 ++++++++++++++++++++++++++++++++++++++
>  fs/nfs/pnfs.h           |   22 +++++++++
>  include/linux/nfs4.h    |    1 +
>  include/linux/nfs_xdr.h |   23 +++++++++
>  7 files changed, 353 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
> index 57bb31a..73a2529 100644
> --- a/fs/nfs/inode.c
> +++ b/fs/nfs/inode.c
> @@ -1424,9 +1424,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
>   */
>  void nfs4_evict_inode(struct inode *inode)
>  {
> -	pnfs_destroy_layout(NFS_I(inode));
> +	pnfs_return_layout(inode, NULL, true);

Why does this want to come before the call to truncate_inode_pages()?
Is there any reason not to put pnfs_return_layout() and
pnfs_destroy_layout into a single helper here?

>  	truncate_inode_pages(&inode->i_data, 0);
>  	end_writeback(inode);
> +	pnfs_destroy_layout(NFS_I(inode));
>  	/* If we are holding a delegation, return it! */
>  	nfs_inode_return_delegation_noreclaim(inode);
>  	/* First call standard NFS clear_inode() code */
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 9bf41ea..b03defb 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -5662,6 +5662,103 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
>  	return status;
>  }
>  
> +static void
> +nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
> +{
> +	struct nfs4_layoutreturn *lrp = calldata;
> +
> +	dprintk("--> %s\n", __func__);
> +	if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
> +				&lrp->res.seq_res, 0, task))
> +		return;
> +	rpc_call_start(task);
> +}
> +
> +static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
> +{
> +	struct nfs4_layoutreturn *lrp = calldata;
> +	struct nfs_server *server;
> +
> +	dprintk("--> %s\n", __func__);
> +
> +	if (!nfs4_sequence_done(task, &lrp->res.seq_res))
> +		return;
> +
> +	if (lrp->args.return_type == RETURN_FILE)
> +		server = NFS_SERVER(lrp->args.inode);
> +	else
> +		server = NULL;
> +	if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
> +		nfs_restart_rpc(task, lrp->clp);
> +		return;
> +	}
> +	if ((task->tk_status == 0) && (lrp->args.return_type == RETURN_FILE)) {
> +		struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
> +
> +		spin_lock(&lo->plh_inode->i_lock);
> +		if (lrp->res.lrs_present)
> +			pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
> +		else
> +			BUG_ON(!list_empty(&lo->plh_segs));
> +		spin_unlock(&lo->plh_inode->i_lock);
> +	}
> +	dprintk("<-- %s\n", __func__);
> +}
> +
> +static void nfs4_layoutreturn_release(void *calldata)
> +{
> +	struct nfs4_layoutreturn *lrp = calldata;
> +
> +	dprintk("--> %s return_type %d\n", __func__, lrp->args.return_type);
> +	if (lrp->args.return_type == RETURN_FILE) {
> +		struct inode *ino = lrp->args.inode;
> +		struct pnfs_layout_hdr *lo = NFS_I(ino)->layout;
> +
> +		put_layout_hdr(lo);
> +	}
> +	kfree(calldata);
> +	dprintk("<-- %s\n", __func__);
> +}
> +
> +static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
> +	.rpc_call_prepare = nfs4_layoutreturn_prepare,
> +	.rpc_call_done = nfs4_layoutreturn_done,
> +	.rpc_release = nfs4_layoutreturn_release,
> +};
> +
> +int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)

Why the 'issync' parameter?

> +{
> +	struct rpc_task *task;
> +	struct rpc_message msg = {
> +		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
> +		.rpc_argp = &lrp->args,
> +		.rpc_resp = &lrp->res,
> +	};
> +	struct rpc_task_setup task_setup_data = {
> +		.rpc_client = lrp->clp->cl_rpcclient,
> +		.rpc_message = &msg,
> +		.callback_ops = &nfs4_layoutreturn_call_ops,
> +		.callback_data = lrp,
> +		.flags = RPC_TASK_ASYNC,
> +	};
> +	int status = 0;
> +
> +	dprintk("--> %s\n", __func__);
> +	task = rpc_run_task(&task_setup_data);
> +	if (IS_ERR(task))
> +		return PTR_ERR(task);
> +	if (!issync)
> +		goto out;
> +	status = nfs4_wait_for_completion_rpc_task(task);
> +	if (status != 0)
> +		goto out;
> +	status = task->tk_status;
> +out:
> +	dprintk("<-- %s\n", __func__);
> +	rpc_put_task(task);
> +	return status;
> +}
> +
>  static int
>  _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
>  {
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index dddfb57..53ea3e5 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -338,7 +338,12 @@ static int nfs4_stat_to_errno(int);
>  				1 /* layoutupdate4 layout type */ + \
>  				1 /* NULL filelayout layoutupdate4 payload */)
>  #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
> -
> +#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \
> +				encode_stateid_maxsz + \
> +				1 /* FIXME: opaque lrf_body always empty at
> +				   *the moment */)
> +#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
> +				1 + decode_stateid_maxsz)
>  #else /* CONFIG_NFS_V4_1 */
>  #define encode_sequence_maxsz	0
>  #define decode_sequence_maxsz	0
> @@ -760,7 +765,14 @@ static int nfs4_stat_to_errno(int);
>  				decode_putfh_maxsz + \
>  				decode_layoutcommit_maxsz + \
>  				decode_getattr_maxsz)
> -
> +#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
> +				encode_sequence_maxsz + \
> +				encode_putfh_maxsz + \
> +				encode_layoutreturn_maxsz)
> +#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
> +				decode_sequence_maxsz + \
> +				decode_putfh_maxsz + \
> +				decode_layoutreturn_maxsz)
>  
>  const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
>  				      compound_encode_hdr_maxsz +
> @@ -1890,6 +1902,37 @@ encode_layoutcommit(struct xdr_stream *xdr,
>  	hdr->replen += decode_layoutcommit_maxsz;
>  	return 0;
>  }
> +
> +static void
> +encode_layoutreturn(struct xdr_stream *xdr,
> +		    const struct nfs4_layoutreturn_args *args,
> +		    struct compound_hdr *hdr)
> +{
> +	nfs4_stateid stateid;
> +	__be32 *p;
> +
> +	p = reserve_space(xdr, 20);
> +	*p++ = cpu_to_be32(OP_LAYOUTRETURN);
> +	*p++ = cpu_to_be32(args->reclaim);
> +	*p++ = cpu_to_be32(args->layout_type);
> +	*p++ = cpu_to_be32(args->range.iomode);
> +	*p = cpu_to_be32(args->return_type);
> +	if (args->return_type == RETURN_FILE) {
> +		p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
> +		p = xdr_encode_hyper(p, args->range.offset);
> +		p = xdr_encode_hyper(p, args->range.length);
> +		spin_lock(&args->inode->i_lock);
> +		memcpy(stateid.data, NFS_I(args->inode)->layout->plh_stateid.data,
> +		       NFS4_STATEID_SIZE);
> +		spin_unlock(&args->inode->i_lock);
> +		p = xdr_encode_opaque_fixed(p, &stateid.data,
> +					    NFS4_STATEID_SIZE);
> +		p = reserve_space(xdr, 4);
> +		*p = cpu_to_be32(0);
> +	}
> +	hdr->nops++;
> +	hdr->replen += decode_layoutreturn_maxsz;
> +}
>  #endif /* CONFIG_NFS_V4_1 */
>  
>  /*
> @@ -2707,9 +2750,9 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
>  /*
>   *  Encode LAYOUTCOMMIT request
>   */
> -static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
> -				     struct xdr_stream *xdr,
> -				     struct nfs4_layoutcommit_args *args)
> +static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
> +				      struct xdr_stream *xdr,
> +				      struct nfs4_layoutcommit_args *args)
>  {
>  	struct compound_hdr hdr = {
>  		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
> @@ -2721,7 +2764,24 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
>  	encode_layoutcommit(xdr, args, &hdr);
>  	encode_getfattr(xdr, args->bitmask, &hdr);
>  	encode_nops(&hdr);
> -	return 0;
> +}
> +
> +/*
> + * Encode LAYOUTRETURN request
> + */
> +static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
> +				      struct xdr_stream *xdr,
> +				      struct nfs4_layoutreturn_args *args)
> +{
> +	struct compound_hdr hdr = {
> +		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
> +	};
> +
> +	encode_compound_hdr(xdr, req, &hdr);
> +	encode_sequence(xdr, &args->seq_args, &hdr);
> +	encode_putfh(xdr, NFS_FH(args->inode), &hdr);
> +	encode_layoutreturn(xdr, args, &hdr);
> +	encode_nops(&hdr);
>  }
>  #endif /* CONFIG_NFS_V4_1 */
>  
> @@ -5202,6 +5262,27 @@ out_overflow:
>  	return -EIO;
>  }
>  
> +static int decode_layoutreturn(struct xdr_stream *xdr,
> +			       struct nfs4_layoutreturn_res *res)
> +{
> +	__be32 *p;
> +	int status;
> +
> +	status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
> +	if (status)
> +		return status;
> +	p = xdr_inline_decode(xdr, 4);
> +	if (unlikely(!p))
> +		goto out_overflow;
> +	res->lrs_present = be32_to_cpup(p);
> +	if (res->lrs_present)
> +		status = decode_stateid(xdr, &res->stateid);
> +	return status;
> +out_overflow:
> +	print_overflow_msg(__func__, xdr);
> +	return -EIO;
> +}
> +
>  static int decode_layoutcommit(struct xdr_stream *xdr,
>  			       struct rpc_rqst *req,
>  			       struct nfs4_layoutcommit_res *res)
> @@ -6319,6 +6400,30 @@ out:
>  }
>  
>  /*
> + * Decode LAYOUTRETURN response
> + */
> +static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
> +				     struct xdr_stream *xdr,
> +				     struct nfs4_layoutreturn_res *res)
> +{
> +	struct compound_hdr hdr;
> +	int status;
> +
> +	status = decode_compound_hdr(xdr, &hdr);
> +	if (status)
> +		goto out;
> +	status = decode_sequence(xdr, &res->seq_res, rqstp);
> +	if (status)
> +		goto out;
> +	status = decode_putfh(xdr);
> +	if (status)
> +		goto out;
> +	status = decode_layoutreturn(xdr, res);
> +out:
> +	return status;
> +}
> +
> +/*
>   * Decode LAYOUTCOMMIT response
>   */
>  static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
> @@ -6544,6 +6649,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
>  	PROC(GETDEVICEINFO,	enc_getdeviceinfo,	dec_getdeviceinfo),
>  	PROC(LAYOUTGET,		enc_layoutget,		dec_layoutget),
>  	PROC(LAYOUTCOMMIT,	enc_layoutcommit,	dec_layoutcommit),
> +	PROC(LAYOUTRETURN,	enc_layoutreturn,	dec_layoutreturn),
>  #endif /* CONFIG_NFS_V4_1 */
>  };
>  
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index d9ab972..89e7725 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -321,6 +321,36 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
>  	return invalid - removed;
>  }
>  
> +/* Returns false if there was nothing to do, true otherwise */
> +static bool
> +pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
> +		     struct pnfs_layout_range *range)
> +{
> +	struct pnfs_layout_segment *lseg, *next;
> +	bool rv = false;
> +
> +	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
> +		__func__, lo, range->offset, range->length, range->iomode);
> +	assert_spin_locked(&lo->plh_inode->i_lock);

Not needed in the case of nfs4_evict_inode.

> +	if (list_empty(&lo->plh_segs)) {
> +		if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
> +			put_layout_hdr_locked(lo);
> +		return 0;
> +	}
> +	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
> +		if (should_free_lseg(lseg->pls_range.iomode, range->iomode)) {
> +			dprintk("%s: freeing lseg %p iomode %d "
> +				"offset %llu length %llu\n", __func__,
> +				lseg, lseg->pls_range.iomode,
> +				lseg->pls_range.offset,
> +				lseg->pls_range.length);
> +			mark_lseg_invalid(lseg, tmp_list);
> +			rv = true;
> +		}
> +	dprintk("%s:Return %d\n", __func__, rv);
> +	return rv;
> +}
> +
>  /* note free_me must contain lsegs from a single layout_hdr */
>  void
>  pnfs_free_lseg_list(struct list_head *free_me)
> @@ -539,6 +569,72 @@ out_err_free:
>  	return NULL;
>  }
>  
> +static int
> +return_layout(struct inode *ino, struct pnfs_layout_range *range, bool wait)
> +{
> +	struct nfs4_layoutreturn *lrp;
> +	struct nfs_server *server = NFS_SERVER(ino);
> +	int status = -ENOMEM;
> +
> +	dprintk("--> %s\n", __func__);
> +
> +	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
> +	if (lrp == NULL) {
> +		put_layout_hdr(NFS_I(ino)->layout);
> +		goto out;
> +	}
> +	lrp->args.reclaim = 0;
> +	lrp->args.layout_type = server->pnfs_curr_ld->id;
> +	lrp->args.return_type = RETURN_FILE;
> +	lrp->args.range = *range;
> +	lrp->args.inode = ino;
> +	lrp->clp = server->nfs_client;
> +
> +	status = nfs4_proc_layoutreturn(lrp, wait);
> +out:
> +	dprintk("<-- %s status: %d\n", __func__, status);
> +	return status;
> +}
> +
> +/* Initiates a LAYOUTRETURN(FILE) */
> +int
> +_pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range, bool wait)

What are the 'range' and 'wait' parameters for? We don't have any users
other than nfs4_evict_inode.

> +{
> +	struct pnfs_layout_hdr *lo = NULL;
> +	struct nfs_inode *nfsi = NFS_I(ino);
> +	struct pnfs_layout_range arg;
> +	LIST_HEAD(tmp_list);
> +	int status = 0;
> +
> +	dprintk("--> %s\n", __func__);
> +
> +	arg.iomode = range ? range->iomode : IOMODE_ANY;
> +	arg.offset = 0;
> +	arg.length = NFS4_MAX_UINT64;
> +
> +	spin_lock(&ino->i_lock);
> +	lo = nfsi->layout;
> +	if (!lo || !pnfs_clear_lseg_list(lo, &tmp_list, &arg)) {
> +		spin_unlock(&ino->i_lock);
> +		dprintk("%s: no layout segments to return\n", __func__);
> +		goto out;
> +	}
> +	/* Reference matched in nfs4_layoutreturn_release */
> +	get_layout_hdr(lo);
> +	spin_unlock(&ino->i_lock);
> +	pnfs_free_lseg_list(&tmp_list);
> +
> +	/* Return layout even if layoutcommit fails */
> +	status = pnfs_layoutcommit_inode(ino, wait);

Why is this needed? Again, by the time we get to nfs4_evict_inode, the
inode is guaranteed to be clean.

> +	if (status)
> +		dprintk("%s: layoutcommit failed, status=%d. Returning layout anyway\n",
> +			__func__, status);
> +	status = return_layout(ino, &arg, wait);
> +out:
> +	dprintk("<-- %s status: %d\n", __func__, status);
> +	return status;
> +}
> +
>  bool pnfs_roc(struct inode *ino)
>  {
>  	struct pnfs_layout_hdr *lo;
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 4cb0a0d..a308f3c 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -123,6 +123,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
>  extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
>  				   struct pnfs_device *dev);
>  extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
> +extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool wait);
>  
>  /* pnfs.c */
>  void get_layout_hdr(struct pnfs_layout_hdr *lo);
> @@ -158,6 +159,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
>  bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
>  void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
>  int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
> +int _pnfs_return_layout(struct inode *, struct pnfs_layout_range *, bool wait);
>  
>  static inline int lo_fail_bit(u32 iomode)
>  {
> @@ -226,6 +228,19 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req)
>  		put_lseg(req->wb_commit_lseg);
>  }
>  
> +static inline int pnfs_return_layout(struct inode *ino,
> +				     struct pnfs_layout_range *range,
> +				     bool wait)
> +{
> +	struct nfs_inode *nfsi = NFS_I(ino);
> +	struct nfs_server *nfss = NFS_SERVER(ino);
> +
> +	if (pnfs_enabled_sb(nfss) && nfsi->layout)
> +		return _pnfs_return_layout(ino, range, wait);
> +
> +	return 0;
> +}
> +
>  #else  /* CONFIG_NFS_V4_1 */
>  
>  static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
> @@ -267,6 +282,13 @@ pnfs_try_to_write_data(struct nfs_write_data *data,
>  	return PNFS_NOT_ATTEMPTED;
>  }
>  
> +static inline int pnfs_return_layout(struct inode *ino,
> +				     struct pnfs_layout_range *range,
> +				     bool wait)
> +{
> +	return 0;
> +}
> +
>  static inline bool
>  pnfs_roc(struct inode *ino)
>  {
> diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
> index 178fafe..9376eaf 100644
> --- a/include/linux/nfs4.h
> +++ b/include/linux/nfs4.h
> @@ -562,6 +562,7 @@ enum {
>  	NFSPROC4_CLNT_LAYOUTGET,
>  	NFSPROC4_CLNT_GETDEVICEINFO,
>  	NFSPROC4_CLNT_LAYOUTCOMMIT,
> +	NFSPROC4_CLNT_LAYOUTRETURN,
>  };
>  
>  /* nfs41 types */
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 78b101e..455ddfb 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -266,6 +266,29 @@ struct nfs4_layoutcommit_data {
>  	struct nfs4_layoutcommit_res res;
>  };
>  
> +struct nfs4_layoutreturn_args {
> +	__u32   reclaim;
> +	__u32   layout_type;
> +	__u32   return_type;

Why do we need a 'return_type'? As far as I can see, this will always be
RETURN_FILE.

> +	struct pnfs_layout_range range;
> +	struct inode *inode;
> +	struct nfs4_sequence_args seq_args;
> +};
> +
> +struct nfs4_layoutreturn_res {
> +	struct nfs4_sequence_res seq_res;
> +	u32 lrs_present;
> +	nfs4_stateid stateid;
> +};
> +
> +struct nfs4_layoutreturn {
> +	struct nfs4_layoutreturn_args args;
> +	struct nfs4_layoutreturn_res res;
> +	struct rpc_cred *cred;
> +	struct nfs_client *clp;
> +	int rpc_status;
> +};
> +
>  /*
>   * Arguments to the open call.
>   */
Benny Halevy April 22, 2011, 6:52 a.m. UTC | #2
On 2011-04-20 22:53, Trond Myklebust wrote:
> On Wed, 2011-04-20 at 20:26 +0300, Benny Halevy wrote:
>> @@ -1424,9 +1424,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
>>   */
>>  void nfs4_evict_inode(struct inode *inode)
>>  {
>> -	pnfs_destroy_layout(NFS_I(inode));
>> +	pnfs_return_layout(inode, NULL, true);
> 
> Why does this want to come before the call to truncate_inode_pages()?

Actually, I don't see any good reason. :-/

> Is there any reason not to put pnfs_return_layout() and
> pnfs_destroy_layout into a single helper here?
> 

Looks like an overkill to me for this one call site.

>>  	truncate_inode_pages(&inode->i_data, 0);
>>  	end_writeback(inode);
>> +	pnfs_destroy_layout(NFS_I(inode));
>>  	/* If we are holding a delegation, return it! */
>>  	nfs_inode_return_delegation_noreclaim(inode);
>>  	/* First call standard NFS clear_inode() code */
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index 9bf41ea..b03defb 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -5662,6 +5662,103 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
>>  	return status;
>>  }
>>  
>> +static void
>> +nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
>> +{
>> +	struct nfs4_layoutreturn *lrp = calldata;
>> +
>> +	dprintk("--> %s\n", __func__);
>> +	if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
>> +				&lrp->res.seq_res, 0, task))
>> +		return;
>> +	rpc_call_start(task);
>> +}
>> +
>> +static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
>> +{
>> +	struct nfs4_layoutreturn *lrp = calldata;
>> +	struct nfs_server *server;
>> +
>> +	dprintk("--> %s\n", __func__);
>> +
>> +	if (!nfs4_sequence_done(task, &lrp->res.seq_res))
>> +		return;
>> +
>> +	if (lrp->args.return_type == RETURN_FILE)
>> +		server = NFS_SERVER(lrp->args.inode);
>> +	else
>> +		server = NULL;
>> +	if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
>> +		nfs_restart_rpc(task, lrp->clp);
>> +		return;
>> +	}
>> +	if ((task->tk_status == 0) && (lrp->args.return_type == RETURN_FILE)) {
>> +		struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
>> +
>> +		spin_lock(&lo->plh_inode->i_lock);
>> +		if (lrp->res.lrs_present)
>> +			pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
>> +		else
>> +			BUG_ON(!list_empty(&lo->plh_segs));
>> +		spin_unlock(&lo->plh_inode->i_lock);
>> +	}
>> +	dprintk("<-- %s\n", __func__);
>> +}
>> +
>> +static void nfs4_layoutreturn_release(void *calldata)
>> +{
>> +	struct nfs4_layoutreturn *lrp = calldata;
>> +
>> +	dprintk("--> %s return_type %d\n", __func__, lrp->args.return_type);
>> +	if (lrp->args.return_type == RETURN_FILE) {
>> +		struct inode *ino = lrp->args.inode;
>> +		struct pnfs_layout_hdr *lo = NFS_I(ino)->layout;
>> +
>> +		put_layout_hdr(lo);
>> +	}
>> +	kfree(calldata);
>> +	dprintk("<-- %s\n", __func__);
>> +}
>> +
>> +static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
>> +	.rpc_call_prepare = nfs4_layoutreturn_prepare,
>> +	.rpc_call_done = nfs4_layoutreturn_done,
>> +	.rpc_release = nfs4_layoutreturn_release,
>> +};
>> +
>> +int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
> 
> Why the 'issync' parameter?
> 

Hmm, you're right.  Currently all call sites use it as sync.

>> +{
>> +	struct rpc_task *task;
>> +	struct rpc_message msg = {
>> +		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
>> +		.rpc_argp = &lrp->args,
>> +		.rpc_resp = &lrp->res,
>> +	};
>> +	struct rpc_task_setup task_setup_data = {
>> +		.rpc_client = lrp->clp->cl_rpcclient,
>> +		.rpc_message = &msg,
>> +		.callback_ops = &nfs4_layoutreturn_call_ops,
>> +		.callback_data = lrp,
>> +		.flags = RPC_TASK_ASYNC,
>> +	};
>> +	int status = 0;
>> +
>> +	dprintk("--> %s\n", __func__);
>> +	task = rpc_run_task(&task_setup_data);
>> +	if (IS_ERR(task))
>> +		return PTR_ERR(task);
>> +	if (!issync)
>> +		goto out;
>> +	status = nfs4_wait_for_completion_rpc_task(task);
>> +	if (status != 0)
>> +		goto out;
>> +	status = task->tk_status;
>> +out:
>> +	dprintk("<-- %s\n", __func__);
>> +	rpc_put_task(task);
>> +	return status;
>> +}
>> +
>>  static int
>>  _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
>>  {
>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>> index dddfb57..53ea3e5 100644
>> --- a/fs/nfs/nfs4xdr.c
>> +++ b/fs/nfs/nfs4xdr.c
>> @@ -338,7 +338,12 @@ static int nfs4_stat_to_errno(int);
>>  				1 /* layoutupdate4 layout type */ + \
>>  				1 /* NULL filelayout layoutupdate4 payload */)
>>  #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
>> -
>> +#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \
>> +				encode_stateid_maxsz + \
>> +				1 /* FIXME: opaque lrf_body always empty at
>> +				   *the moment */)
>> +#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
>> +				1 + decode_stateid_maxsz)
>>  #else /* CONFIG_NFS_V4_1 */
>>  #define encode_sequence_maxsz	0
>>  #define decode_sequence_maxsz	0
>> @@ -760,7 +765,14 @@ static int nfs4_stat_to_errno(int);
>>  				decode_putfh_maxsz + \
>>  				decode_layoutcommit_maxsz + \
>>  				decode_getattr_maxsz)
>> -
>> +#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
>> +				encode_sequence_maxsz + \
>> +				encode_putfh_maxsz + \
>> +				encode_layoutreturn_maxsz)
>> +#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
>> +				decode_sequence_maxsz + \
>> +				decode_putfh_maxsz + \
>> +				decode_layoutreturn_maxsz)
>>  
>>  const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
>>  				      compound_encode_hdr_maxsz +
>> @@ -1890,6 +1902,37 @@ encode_layoutcommit(struct xdr_stream *xdr,
>>  	hdr->replen += decode_layoutcommit_maxsz;
>>  	return 0;
>>  }
>> +
>> +static void
>> +encode_layoutreturn(struct xdr_stream *xdr,
>> +		    const struct nfs4_layoutreturn_args *args,
>> +		    struct compound_hdr *hdr)
>> +{
>> +	nfs4_stateid stateid;
>> +	__be32 *p;
>> +
>> +	p = reserve_space(xdr, 20);
>> +	*p++ = cpu_to_be32(OP_LAYOUTRETURN);
>> +	*p++ = cpu_to_be32(args->reclaim);
>> +	*p++ = cpu_to_be32(args->layout_type);
>> +	*p++ = cpu_to_be32(args->range.iomode);
>> +	*p = cpu_to_be32(args->return_type);
>> +	if (args->return_type == RETURN_FILE) {
>> +		p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
>> +		p = xdr_encode_hyper(p, args->range.offset);
>> +		p = xdr_encode_hyper(p, args->range.length);
>> +		spin_lock(&args->inode->i_lock);
>> +		memcpy(stateid.data, NFS_I(args->inode)->layout->plh_stateid.data,
>> +		       NFS4_STATEID_SIZE);
>> +		spin_unlock(&args->inode->i_lock);
>> +		p = xdr_encode_opaque_fixed(p, &stateid.data,
>> +					    NFS4_STATEID_SIZE);
>> +		p = reserve_space(xdr, 4);
>> +		*p = cpu_to_be32(0);
>> +	}
>> +	hdr->nops++;
>> +	hdr->replen += decode_layoutreturn_maxsz;
>> +}
>>  #endif /* CONFIG_NFS_V4_1 */
>>  
>>  /*
>> @@ -2707,9 +2750,9 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
>>  /*
>>   *  Encode LAYOUTCOMMIT request
>>   */
>> -static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
>> -				     struct xdr_stream *xdr,
>> -				     struct nfs4_layoutcommit_args *args)
>> +static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
>> +				      struct xdr_stream *xdr,
>> +				      struct nfs4_layoutcommit_args *args)
>>  {
>>  	struct compound_hdr hdr = {
>>  		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
>> @@ -2721,7 +2764,24 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
>>  	encode_layoutcommit(xdr, args, &hdr);
>>  	encode_getfattr(xdr, args->bitmask, &hdr);
>>  	encode_nops(&hdr);
>> -	return 0;
>> +}
>> +
>> +/*
>> + * Encode LAYOUTRETURN request
>> + */
>> +static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
>> +				      struct xdr_stream *xdr,
>> +				      struct nfs4_layoutreturn_args *args)
>> +{
>> +	struct compound_hdr hdr = {
>> +		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
>> +	};
>> +
>> +	encode_compound_hdr(xdr, req, &hdr);
>> +	encode_sequence(xdr, &args->seq_args, &hdr);
>> +	encode_putfh(xdr, NFS_FH(args->inode), &hdr);
>> +	encode_layoutreturn(xdr, args, &hdr);
>> +	encode_nops(&hdr);
>>  }
>>  #endif /* CONFIG_NFS_V4_1 */
>>  
>> @@ -5202,6 +5262,27 @@ out_overflow:
>>  	return -EIO;
>>  }
>>  
>> +static int decode_layoutreturn(struct xdr_stream *xdr,
>> +			       struct nfs4_layoutreturn_res *res)
>> +{
>> +	__be32 *p;
>> +	int status;
>> +
>> +	status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
>> +	if (status)
>> +		return status;
>> +	p = xdr_inline_decode(xdr, 4);
>> +	if (unlikely(!p))
>> +		goto out_overflow;
>> +	res->lrs_present = be32_to_cpup(p);
>> +	if (res->lrs_present)
>> +		status = decode_stateid(xdr, &res->stateid);
>> +	return status;
>> +out_overflow:
>> +	print_overflow_msg(__func__, xdr);
>> +	return -EIO;
>> +}
>> +
>>  static int decode_layoutcommit(struct xdr_stream *xdr,
>>  			       struct rpc_rqst *req,
>>  			       struct nfs4_layoutcommit_res *res)
>> @@ -6319,6 +6400,30 @@ out:
>>  }
>>  
>>  /*
>> + * Decode LAYOUTRETURN response
>> + */
>> +static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
>> +				     struct xdr_stream *xdr,
>> +				     struct nfs4_layoutreturn_res *res)
>> +{
>> +	struct compound_hdr hdr;
>> +	int status;
>> +
>> +	status = decode_compound_hdr(xdr, &hdr);
>> +	if (status)
>> +		goto out;
>> +	status = decode_sequence(xdr, &res->seq_res, rqstp);
>> +	if (status)
>> +		goto out;
>> +	status = decode_putfh(xdr);
>> +	if (status)
>> +		goto out;
>> +	status = decode_layoutreturn(xdr, res);
>> +out:
>> +	return status;
>> +}
>> +
>> +/*
>>   * Decode LAYOUTCOMMIT response
>>   */
>>  static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
>> @@ -6544,6 +6649,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
>>  	PROC(GETDEVICEINFO,	enc_getdeviceinfo,	dec_getdeviceinfo),
>>  	PROC(LAYOUTGET,		enc_layoutget,		dec_layoutget),
>>  	PROC(LAYOUTCOMMIT,	enc_layoutcommit,	dec_layoutcommit),
>> +	PROC(LAYOUTRETURN,	enc_layoutreturn,	dec_layoutreturn),
>>  #endif /* CONFIG_NFS_V4_1 */
>>  };
>>  
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index d9ab972..89e7725 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -321,6 +321,36 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
>>  	return invalid - removed;
>>  }
>>  
>> +/* Returns false if there was nothing to do, true otherwise */
>> +static bool
>> +pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
>> +		     struct pnfs_layout_range *range)
>> +{
>> +	struct pnfs_layout_segment *lseg, *next;
>> +	bool rv = false;
>> +
>> +	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
>> +		__func__, lo, range->offset, range->length, range->iomode);
>> +	assert_spin_locked(&lo->plh_inode->i_lock);
> 
> Not needed in the case of nfs4_evict_inode.
> 

True.  I can remove the assert
but the lock better be taken if called while the inode is alive.

>> +	if (list_empty(&lo->plh_segs)) {
>> +		if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
>> +			put_layout_hdr_locked(lo);
>> +		return 0;
>> +	}
>> +	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
>> +		if (should_free_lseg(lseg->pls_range.iomode, range->iomode)) {
>> +			dprintk("%s: freeing lseg %p iomode %d "
>> +				"offset %llu length %llu\n", __func__,
>> +				lseg, lseg->pls_range.iomode,
>> +				lseg->pls_range.offset,
>> +				lseg->pls_range.length);
>> +			mark_lseg_invalid(lseg, tmp_list);
>> +			rv = true;
>> +		}
>> +	dprintk("%s:Return %d\n", __func__, rv);
>> +	return rv;
>> +}
>> +
>>  /* note free_me must contain lsegs from a single layout_hdr */
>>  void
>>  pnfs_free_lseg_list(struct list_head *free_me)
>> @@ -539,6 +569,72 @@ out_err_free:
>>  	return NULL;
>>  }
>>  
>> +static int
>> +return_layout(struct inode *ino, struct pnfs_layout_range *range, bool wait)
>> +{
>> +	struct nfs4_layoutreturn *lrp;
>> +	struct nfs_server *server = NFS_SERVER(ino);
>> +	int status = -ENOMEM;
>> +
>> +	dprintk("--> %s\n", __func__);
>> +
>> +	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
>> +	if (lrp == NULL) {
>> +		put_layout_hdr(NFS_I(ino)->layout);
>> +		goto out;
>> +	}
>> +	lrp->args.reclaim = 0;
>> +	lrp->args.layout_type = server->pnfs_curr_ld->id;
>> +	lrp->args.return_type = RETURN_FILE;
>> +	lrp->args.range = *range;
>> +	lrp->args.inode = ino;
>> +	lrp->clp = server->nfs_client;
>> +
>> +	status = nfs4_proc_layoutreturn(lrp, wait);
>> +out:
>> +	dprintk("<-- %s status: %d\n", __func__, status);
>> +	return status;
>> +}
>> +
>> +/* Initiates a LAYOUTRETURN(FILE) */
>> +int
>> +_pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range, bool wait)
> 
> What are the 'range' and 'wait' parameters for? We don't have any users
> other than nfs4_evict_inode.
> 

OK, I'll introduce these with their usage.

>> +{
>> +	struct pnfs_layout_hdr *lo = NULL;
>> +	struct nfs_inode *nfsi = NFS_I(ino);
>> +	struct pnfs_layout_range arg;
>> +	LIST_HEAD(tmp_list);
>> +	int status = 0;
>> +
>> +	dprintk("--> %s\n", __func__);
>> +
>> +	arg.iomode = range ? range->iomode : IOMODE_ANY;
>> +	arg.offset = 0;
>> +	arg.length = NFS4_MAX_UINT64;
>> +
>> +	spin_lock(&ino->i_lock);
>> +	lo = nfsi->layout;
>> +	if (!lo || !pnfs_clear_lseg_list(lo, &tmp_list, &arg)) {
>> +		spin_unlock(&ino->i_lock);
>> +		dprintk("%s: no layout segments to return\n", __func__);
>> +		goto out;
>> +	}
>> +	/* Reference matched in nfs4_layoutreturn_release */
>> +	get_layout_hdr(lo);
>> +	spin_unlock(&ino->i_lock);
>> +	pnfs_free_lseg_list(&tmp_list);
>> +
>> +	/* Return layout even if layoutcommit fails */
>> +	status = pnfs_layoutcommit_inode(ino, wait);
> 
> Why is this needed? Again, by the time we get to nfs4_evict_inode, the
> inode is guaranteed to be clean.
> 

OK, let me just add a WARN_ON if we get here and layout commit is
required. Does that work for you?

>> +	if (status)
>> +		dprintk("%s: layoutcommit failed, status=%d. Returning layout anyway\n",
>> +			__func__, status);
>> +	status = return_layout(ino, &arg, wait);
>> +out:
>> +	dprintk("<-- %s status: %d\n", __func__, status);
>> +	return status;
>> +}
>> +
>>  bool pnfs_roc(struct inode *ino)
>>  {
>>  	struct pnfs_layout_hdr *lo;
>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>> index 4cb0a0d..a308f3c 100644
>> --- a/fs/nfs/pnfs.h
>> +++ b/fs/nfs/pnfs.h
>> @@ -123,6 +123,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
>>  extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
>>  				   struct pnfs_device *dev);
>>  extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
>> +extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool wait);
>>  
>>  /* pnfs.c */
>>  void get_layout_hdr(struct pnfs_layout_hdr *lo);
>> @@ -158,6 +159,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
>>  bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
>>  void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
>>  int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
>> +int _pnfs_return_layout(struct inode *, struct pnfs_layout_range *, bool wait);
>>  
>>  static inline int lo_fail_bit(u32 iomode)
>>  {
>> @@ -226,6 +228,19 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req)
>>  		put_lseg(req->wb_commit_lseg);
>>  }
>>  
>> +static inline int pnfs_return_layout(struct inode *ino,
>> +				     struct pnfs_layout_range *range,
>> +				     bool wait)
>> +{
>> +	struct nfs_inode *nfsi = NFS_I(ino);
>> +	struct nfs_server *nfss = NFS_SERVER(ino);
>> +
>> +	if (pnfs_enabled_sb(nfss) && nfsi->layout)
>> +		return _pnfs_return_layout(ino, range, wait);
>> +
>> +	return 0;
>> +}
>> +
>>  #else  /* CONFIG_NFS_V4_1 */
>>  
>>  static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
>> @@ -267,6 +282,13 @@ pnfs_try_to_write_data(struct nfs_write_data *data,
>>  	return PNFS_NOT_ATTEMPTED;
>>  }
>>  
>> +static inline int pnfs_return_layout(struct inode *ino,
>> +				     struct pnfs_layout_range *range,
>> +				     bool wait)
>> +{
>> +	return 0;
>> +}
>> +
>>  static inline bool
>>  pnfs_roc(struct inode *ino)
>>  {
>> diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
>> index 178fafe..9376eaf 100644
>> --- a/include/linux/nfs4.h
>> +++ b/include/linux/nfs4.h
>> @@ -562,6 +562,7 @@ enum {
>>  	NFSPROC4_CLNT_LAYOUTGET,
>>  	NFSPROC4_CLNT_GETDEVICEINFO,
>>  	NFSPROC4_CLNT_LAYOUTCOMMIT,
>> +	NFSPROC4_CLNT_LAYOUTRETURN,
>>  };
>>  
>>  /* nfs41 types */
>> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
>> index 78b101e..455ddfb 100644
>> --- a/include/linux/nfs_xdr.h
>> +++ b/include/linux/nfs_xdr.h
>> @@ -266,6 +266,29 @@ struct nfs4_layoutcommit_data {
>>  	struct nfs4_layoutcommit_res res;
>>  };
>>  
>> +struct nfs4_layoutreturn_args {
>> +	__u32   reclaim;
>> +	__u32   layout_type;
>> +	__u32   return_type;
> 
> Why do we need a 'return_type'? As far as I can see, this will always be
> RETURN_FILE.
> 

Yeah, we can remove this for now.

Benny

>> +	struct pnfs_layout_range range;
>> +	struct inode *inode;
>> +	struct nfs4_sequence_args seq_args;
>> +};
>> +
>> +struct nfs4_layoutreturn_res {
>> +	struct nfs4_sequence_res seq_res;
>> +	u32 lrs_present;
>> +	nfs4_stateid stateid;
>> +};
>> +
>> +struct nfs4_layoutreturn {
>> +	struct nfs4_layoutreturn_args args;
>> +	struct nfs4_layoutreturn_res res;
>> +	struct rpc_cred *cred;
>> +	struct nfs_client *clp;
>> +	int rpc_status;
>> +};
>> +
>>  /*
>>   * Arguments to the open call.
>>   */
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 57bb31a..73a2529 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1424,9 +1424,10 @@  static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  */
 void nfs4_evict_inode(struct inode *inode)
 {
-	pnfs_destroy_layout(NFS_I(inode));
+	pnfs_return_layout(inode, NULL, true);
 	truncate_inode_pages(&inode->i_data, 0);
 	end_writeback(inode);
+	pnfs_destroy_layout(NFS_I(inode));
 	/* If we are holding a delegation, return it! */
 	nfs_inode_return_delegation_noreclaim(inode);
 	/* First call standard NFS clear_inode() code */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9bf41ea..b03defb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5662,6 +5662,103 @@  int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
 	return status;
 }
 
+static void
+nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_layoutreturn *lrp = calldata;
+
+	dprintk("--> %s\n", __func__);
+	if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
+				&lrp->res.seq_res, 0, task))
+		return;
+	rpc_call_start(task);
+}
+
+static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_layoutreturn *lrp = calldata;
+	struct nfs_server *server;
+
+	dprintk("--> %s\n", __func__);
+
+	if (!nfs4_sequence_done(task, &lrp->res.seq_res))
+		return;
+
+	if (lrp->args.return_type == RETURN_FILE)
+		server = NFS_SERVER(lrp->args.inode);
+	else
+		server = NULL;
+	if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+		nfs_restart_rpc(task, lrp->clp);
+		return;
+	}
+	if ((task->tk_status == 0) && (lrp->args.return_type == RETURN_FILE)) {
+		struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
+
+		spin_lock(&lo->plh_inode->i_lock);
+		if (lrp->res.lrs_present)
+			pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
+		else
+			BUG_ON(!list_empty(&lo->plh_segs));
+		spin_unlock(&lo->plh_inode->i_lock);
+	}
+	dprintk("<-- %s\n", __func__);
+}
+
+static void nfs4_layoutreturn_release(void *calldata)
+{
+	struct nfs4_layoutreturn *lrp = calldata;
+
+	dprintk("--> %s return_type %d\n", __func__, lrp->args.return_type);
+	if (lrp->args.return_type == RETURN_FILE) {
+		struct inode *ino = lrp->args.inode;
+		struct pnfs_layout_hdr *lo = NFS_I(ino)->layout;
+
+		put_layout_hdr(lo);
+	}
+	kfree(calldata);
+	dprintk("<-- %s\n", __func__);
+}
+
+static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
+	.rpc_call_prepare = nfs4_layoutreturn_prepare,
+	.rpc_call_done = nfs4_layoutreturn_done,
+	.rpc_release = nfs4_layoutreturn_release,
+};
+
+int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
+{
+	struct rpc_task *task;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
+		.rpc_argp = &lrp->args,
+		.rpc_resp = &lrp->res,
+	};
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = lrp->clp->cl_rpcclient,
+		.rpc_message = &msg,
+		.callback_ops = &nfs4_layoutreturn_call_ops,
+		.callback_data = lrp,
+		.flags = RPC_TASK_ASYNC,
+	};
+	int status = 0;
+
+	dprintk("--> %s\n", __func__);
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	if (!issync)
+		goto out;
+	status = nfs4_wait_for_completion_rpc_task(task);
+	if (status != 0)
+		goto out;
+	status = task->tk_status;
+out:
+	dprintk("<-- %s\n", __func__);
+	rpc_put_task(task);
+	return status;
+}
+
 static int
 _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
 {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index dddfb57..53ea3e5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -338,7 +338,12 @@  static int nfs4_stat_to_errno(int);
 				1 /* layoutupdate4 layout type */ + \
 				1 /* NULL filelayout layoutupdate4 payload */)
 #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
-
+#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \
+				encode_stateid_maxsz + \
+				1 /* FIXME: opaque lrf_body always empty at
+				   *the moment */)
+#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
+				1 + decode_stateid_maxsz)
 #else /* CONFIG_NFS_V4_1 */
 #define encode_sequence_maxsz	0
 #define decode_sequence_maxsz	0
@@ -760,7 +765,14 @@  static int nfs4_stat_to_errno(int);
 				decode_putfh_maxsz + \
 				decode_layoutcommit_maxsz + \
 				decode_getattr_maxsz)
-
+#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
+				encode_putfh_maxsz + \
+				encode_layoutreturn_maxsz)
+#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
+				decode_putfh_maxsz + \
+				decode_layoutreturn_maxsz)
 
 const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
 				      compound_encode_hdr_maxsz +
@@ -1890,6 +1902,37 @@  encode_layoutcommit(struct xdr_stream *xdr,
 	hdr->replen += decode_layoutcommit_maxsz;
 	return 0;
 }
+
+static void
+encode_layoutreturn(struct xdr_stream *xdr,
+		    const struct nfs4_layoutreturn_args *args,
+		    struct compound_hdr *hdr)
+{
+	nfs4_stateid stateid;
+	__be32 *p;
+
+	p = reserve_space(xdr, 20);
+	*p++ = cpu_to_be32(OP_LAYOUTRETURN);
+	*p++ = cpu_to_be32(args->reclaim);
+	*p++ = cpu_to_be32(args->layout_type);
+	*p++ = cpu_to_be32(args->range.iomode);
+	*p = cpu_to_be32(args->return_type);
+	if (args->return_type == RETURN_FILE) {
+		p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
+		p = xdr_encode_hyper(p, args->range.offset);
+		p = xdr_encode_hyper(p, args->range.length);
+		spin_lock(&args->inode->i_lock);
+		memcpy(stateid.data, NFS_I(args->inode)->layout->plh_stateid.data,
+		       NFS4_STATEID_SIZE);
+		spin_unlock(&args->inode->i_lock);
+		p = xdr_encode_opaque_fixed(p, &stateid.data,
+					    NFS4_STATEID_SIZE);
+		p = reserve_space(xdr, 4);
+		*p = cpu_to_be32(0);
+	}
+	hdr->nops++;
+	hdr->replen += decode_layoutreturn_maxsz;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -2707,9 +2750,9 @@  static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
 /*
  *  Encode LAYOUTCOMMIT request
  */
-static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
-				     struct xdr_stream *xdr,
-				     struct nfs4_layoutcommit_args *args)
+static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      struct nfs4_layoutcommit_args *args)
 {
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2721,7 +2764,24 @@  static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
 	encode_layoutcommit(xdr, args, &hdr);
 	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
+}
+
+/*
+ * Encode LAYOUTRETURN request
+ */
+static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      struct nfs4_layoutreturn_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, NFS_FH(args->inode), &hdr);
+	encode_layoutreturn(xdr, args, &hdr);
+	encode_nops(&hdr);
 }
 #endif /* CONFIG_NFS_V4_1 */
 
@@ -5202,6 +5262,27 @@  out_overflow:
 	return -EIO;
 }
 
+static int decode_layoutreturn(struct xdr_stream *xdr,
+			       struct nfs4_layoutreturn_res *res)
+{
+	__be32 *p;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
+	if (status)
+		return status;
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	res->lrs_present = be32_to_cpup(p);
+	if (res->lrs_present)
+		status = decode_stateid(xdr, &res->stateid);
+	return status;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
 static int decode_layoutcommit(struct xdr_stream *xdr,
 			       struct rpc_rqst *req,
 			       struct nfs4_layoutcommit_res *res)
@@ -6319,6 +6400,30 @@  out:
 }
 
 /*
+ * Decode LAYOUTRETURN response
+ */
+static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
+				     struct xdr_stream *xdr,
+				     struct nfs4_layoutreturn_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_layoutreturn(xdr, res);
+out:
+	return status;
+}
+
+/*
  * Decode LAYOUTCOMMIT response
  */
 static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
@@ -6544,6 +6649,7 @@  struct rpc_procinfo	nfs4_procedures[] = {
 	PROC(GETDEVICEINFO,	enc_getdeviceinfo,	dec_getdeviceinfo),
 	PROC(LAYOUTGET,		enc_layoutget,		dec_layoutget),
 	PROC(LAYOUTCOMMIT,	enc_layoutcommit,	dec_layoutcommit),
+	PROC(LAYOUTRETURN,	enc_layoutreturn,	dec_layoutreturn),
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d9ab972..89e7725 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -321,6 +321,36 @@  mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 	return invalid - removed;
 }
 
+/* Returns false if there was nothing to do, true otherwise */
+static bool
+pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
+		     struct pnfs_layout_range *range)
+{
+	struct pnfs_layout_segment *lseg, *next;
+	bool rv = false;
+
+	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
+		__func__, lo, range->offset, range->length, range->iomode);
+	assert_spin_locked(&lo->plh_inode->i_lock);
+	if (list_empty(&lo->plh_segs)) {
+		if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
+			put_layout_hdr_locked(lo);
+		return 0;
+	}
+	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
+		if (should_free_lseg(lseg->pls_range.iomode, range->iomode)) {
+			dprintk("%s: freeing lseg %p iomode %d "
+				"offset %llu length %llu\n", __func__,
+				lseg, lseg->pls_range.iomode,
+				lseg->pls_range.offset,
+				lseg->pls_range.length);
+			mark_lseg_invalid(lseg, tmp_list);
+			rv = true;
+		}
+	dprintk("%s:Return %d\n", __func__, rv);
+	return rv;
+}
+
 /* note free_me must contain lsegs from a single layout_hdr */
 void
 pnfs_free_lseg_list(struct list_head *free_me)
@@ -539,6 +569,72 @@  out_err_free:
 	return NULL;
 }
 
+static int
+return_layout(struct inode *ino, struct pnfs_layout_range *range, bool wait)
+{
+	struct nfs4_layoutreturn *lrp;
+	struct nfs_server *server = NFS_SERVER(ino);
+	int status = -ENOMEM;
+
+	dprintk("--> %s\n", __func__);
+
+	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
+	if (lrp == NULL) {
+		put_layout_hdr(NFS_I(ino)->layout);
+		goto out;
+	}
+	lrp->args.reclaim = 0;
+	lrp->args.layout_type = server->pnfs_curr_ld->id;
+	lrp->args.return_type = RETURN_FILE;
+	lrp->args.range = *range;
+	lrp->args.inode = ino;
+	lrp->clp = server->nfs_client;
+
+	status = nfs4_proc_layoutreturn(lrp, wait);
+out:
+	dprintk("<-- %s status: %d\n", __func__, status);
+	return status;
+}
+
+/* Initiates a LAYOUTRETURN(FILE) */
+int
+_pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range, bool wait)
+{
+	struct pnfs_layout_hdr *lo = NULL;
+	struct nfs_inode *nfsi = NFS_I(ino);
+	struct pnfs_layout_range arg;
+	LIST_HEAD(tmp_list);
+	int status = 0;
+
+	dprintk("--> %s\n", __func__);
+
+	arg.iomode = range ? range->iomode : IOMODE_ANY;
+	arg.offset = 0;
+	arg.length = NFS4_MAX_UINT64;
+
+	spin_lock(&ino->i_lock);
+	lo = nfsi->layout;
+	if (!lo || !pnfs_clear_lseg_list(lo, &tmp_list, &arg)) {
+		spin_unlock(&ino->i_lock);
+		dprintk("%s: no layout segments to return\n", __func__);
+		goto out;
+	}
+	/* Reference matched in nfs4_layoutreturn_release */
+	get_layout_hdr(lo);
+	spin_unlock(&ino->i_lock);
+	pnfs_free_lseg_list(&tmp_list);
+
+	/* Return layout even if layoutcommit fails */
+	status = pnfs_layoutcommit_inode(ino, wait);
+	if (status)
+		dprintk("%s: layoutcommit failed, status=%d. Returning layout anyway\n",
+			__func__, status);
+	status = return_layout(ino, &arg, wait);
+out:
+	dprintk("<-- %s status: %d\n", __func__, status);
+	return status;
+}
+
 bool pnfs_roc(struct inode *ino)
 {
 	struct pnfs_layout_hdr *lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 4cb0a0d..a308f3c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -123,6 +123,7 @@  extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
 extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
 				   struct pnfs_device *dev);
 extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
+extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool wait);
 
 /* pnfs.c */
 void get_layout_hdr(struct pnfs_layout_hdr *lo);
@@ -158,6 +159,7 @@  void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
 void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
+int _pnfs_return_layout(struct inode *, struct pnfs_layout_range *, bool wait);
 
 static inline int lo_fail_bit(u32 iomode)
 {
@@ -226,6 +228,19 @@  static inline void pnfs_clear_request_commit(struct nfs_page *req)
 		put_lseg(req->wb_commit_lseg);
 }
 
+static inline int pnfs_return_layout(struct inode *ino,
+				     struct pnfs_layout_range *range,
+				     bool wait)
+{
+	struct nfs_inode *nfsi = NFS_I(ino);
+	struct nfs_server *nfss = NFS_SERVER(ino);
+
+	if (pnfs_enabled_sb(nfss) && nfsi->layout)
+		return _pnfs_return_layout(ino, range, wait);
+
+	return 0;
+}
+
 #else  /* CONFIG_NFS_V4_1 */
 
 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -267,6 +282,13 @@  pnfs_try_to_write_data(struct nfs_write_data *data,
 	return PNFS_NOT_ATTEMPTED;
 }
 
+static inline int pnfs_return_layout(struct inode *ino,
+				     struct pnfs_layout_range *range,
+				     bool wait)
+{
+	return 0;
+}
+
 static inline bool
 pnfs_roc(struct inode *ino)
 {
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 178fafe..9376eaf 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -562,6 +562,7 @@  enum {
 	NFSPROC4_CLNT_LAYOUTGET,
 	NFSPROC4_CLNT_GETDEVICEINFO,
 	NFSPROC4_CLNT_LAYOUTCOMMIT,
+	NFSPROC4_CLNT_LAYOUTRETURN,
 };
 
 /* nfs41 types */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 78b101e..455ddfb 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -266,6 +266,29 @@  struct nfs4_layoutcommit_data {
 	struct nfs4_layoutcommit_res res;
 };
 
+struct nfs4_layoutreturn_args {
+	__u32   reclaim;
+	__u32   layout_type;
+	__u32   return_type;
+	struct pnfs_layout_range range;
+	struct inode *inode;
+	struct nfs4_sequence_args seq_args;
+};
+
+struct nfs4_layoutreturn_res {
+	struct nfs4_sequence_res seq_res;
+	u32 lrs_present;
+	nfs4_stateid stateid;
+};
+
+struct nfs4_layoutreturn {
+	struct nfs4_layoutreturn_args args;
+	struct nfs4_layoutreturn_res res;
+	struct rpc_cred *cred;
+	struct nfs_client *clp;
+	int rpc_status;
+};
+
 /*
  * Arguments to the open call.
  */