diff mbox series

[v4,4/8] NFSD add COPY_NOTIFY operation

Message ID 20190708192352.12614-5-olga.kornievskaia@gmail.com (mailing list archive)
State New, archived
Headers show
Series server-side support for "inter" SSC copy | expand

Commit Message

Olga Kornievskaia July 8, 2019, 7:23 p.m. UTC
From: Olga Kornievskaia <kolga@netapp.com>

Introducing the COPY_NOTIFY operation.

Create a new unique stateid that will keep track of the copy
state and the upcoming READs that will use that stateid. Keep
it in the list associated with parent stateid.

Return single netaddr to advertise to the copy.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfsd/nfs4proc.c  | 71 +++++++++++++++++++++++++++++++++++----
 fs/nfsd/nfs4state.c | 64 +++++++++++++++++++++++++++++++----
 fs/nfsd/nfs4xdr.c   | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/nfsd/state.h     | 18 ++++++++--
 fs/nfsd/xdr4.h      | 13 +++++++
 5 files changed, 247 insertions(+), 16 deletions(-)

Comments

Anna Schumaker July 9, 2019, 12:34 p.m. UTC | #1
Hi Olga,

On Mon, 2019-07-08 at 15:23 -0400, Olga Kornievskaia wrote:
> From: Olga Kornievskaia <kolga@netapp.com>
> 
> Introducing the COPY_NOTIFY operation.
> 
> Create a new unique stateid that will keep track of the copy
> state and the upcoming READs that will use that stateid. Keep
> it in the list associated with parent stateid.
> 
> Return single netaddr to advertise to the copy.
> 
> Signed-off-by: Andy Adamson <andros@netapp.com>
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  fs/nfsd/nfs4proc.c  | 71 +++++++++++++++++++++++++++++++++++----
>  fs/nfsd/nfs4state.c | 64 +++++++++++++++++++++++++++++++----
>  fs/nfsd/nfs4xdr.c   | 97
> +++++++++++++++++++++++++++++++++++++++++++++++++++--
>  fs/nfsd/state.h     | 18 ++++++++--
>  fs/nfsd/xdr4.h      | 13 +++++++
>  5 files changed, 247 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index cfd8767..c39fa72 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -37,6 +37,7 @@
>  #include <linux/falloc.h>
>  #include <linux/slab.h>
>  #include <linux/kthread.h>
> +#include <linux/sunrpc/addr.h>
>  
>  #include "idmap.h"
>  #include "cache.h"
> @@ -1033,7 +1034,8 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst
> *rqstp, struct svc_fh *fh)
>  static __be32
>  nfsd4_verify_copy(struct svc_rqst *rqstp, struct
> nfsd4_compound_state *cstate,
>  		  stateid_t *src_stateid, struct file **src,
> -		  stateid_t *dst_stateid, struct file **dst)
> +		  stateid_t *dst_stateid, struct file **dst,
> +		  struct nfs4_stid **stid)
>  {
>  	__be32 status;
>  
> @@ -1050,7 +1052,7 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst
> *rqstp, struct svc_fh *fh)
>  
>  	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate-
> >current_fh,
>  					    dst_stateid, WR_STATE, dst,
> NULL,
> -					    NULL);
> +					    stid);
>  	if (status) {
>  		dprintk("NFSD: %s: couldn't process dst stateid!\n",
> __func__);
>  		goto out_put_src;
> @@ -1081,7 +1083,7 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst
> *rqstp, struct svc_fh *fh)
>  	__be32 status;
>  
>  	status = nfsd4_verify_copy(rqstp, cstate, &clone-
> >cl_src_stateid, &src,
> -				   &clone->cl_dst_stateid, &dst);
> +				   &clone->cl_dst_stateid, &dst, NULL);
>  	if (status)
>  		goto out;
>  
> @@ -1228,7 +1230,7 @@ static void dup_copy_fields(struct nfsd4_copy
> *src, struct nfsd4_copy *dst)
>  
>  static void cleanup_async_copy(struct nfsd4_copy *copy)
>  {
> -	nfs4_free_cp_state(copy);
> +	nfs4_free_copy_state(copy);
>  	fput(copy->file_dst);
>  	fput(copy->file_src);
>  	spin_lock(&copy->cp_clp->async_lock);
> @@ -1268,7 +1270,7 @@ static int nfsd4_do_async_copy(void *data)
>  
>  	status = nfsd4_verify_copy(rqstp, cstate, &copy-
> >cp_src_stateid,
>  				   &copy->file_src, &copy-
> >cp_dst_stateid,
> -				   &copy->file_dst);
> +				   &copy->file_dst, NULL);
>  	if (status)
>  		goto out;
>  
> @@ -1282,7 +1284,7 @@ static int nfsd4_do_async_copy(void *data)
>  		async_copy = kzalloc(sizeof(struct nfsd4_copy),
> GFP_KERNEL);
>  		if (!async_copy)
>  			goto out;
> -		if (!nfs4_init_cp_state(nn, copy)) {
> +		if (!nfs4_init_copy_state(nn, copy)) {
>  			kfree(async_copy);
>  			goto out;
>  		}
> @@ -1346,6 +1348,42 @@ struct nfsd4_copy *
>  }
>  
>  static __be32
> +nfsd4_copy_notify(struct svc_rqst *rqstp, struct
> nfsd4_compound_state *cstate,
> +		  union nfsd4_op_u *u)
> +{
> +	struct nfsd4_copy_notify *cn = &u->copy_notify;
> +	__be32 status;
> +	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> +	struct nfs4_stid *stid;
> +	struct nfs4_cpntf_state *cps;
> +
> +	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate-
> >current_fh,
> +					&cn->cpn_src_stateid, RD_STATE,
> NULL,
> +					NULL, &stid);
> +	if (status)
> +		return status;
> +
> +	cn->cpn_sec = nn->nfsd4_lease;
> +	cn->cpn_nsec = 0;
> +
> +	status = nfserrno(-ENOMEM);
> +	cps = nfs4_alloc_init_cpntf_state(nn, stid);
> +	if (!cps)
> +		return status;
> +	memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid,
> sizeof(stateid_t));
> +
> +	/* For now, only return one server address in cpn_src, the
> +	 * address used by the client to connect to this server.
> +	 */
> +	cn->cpn_src.nl4_type = NL4_NETADDR;
> +	status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
> +				 &cn->cpn_src.u.nl4_addr);
> +	WARN_ON_ONCE(status);
> +
> +	return status;
> +}
> +
> +static __be32
>  nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state
> *cstate,
>  		struct nfsd4_fallocate *fallocate, int flags)
>  {
> @@ -2298,6 +2336,21 @@ static inline u32
> nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
>  		1 /* osr_complete<1> optional 0 for now */) *
> sizeof(__be32);
>  }
>  
> +static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp,
> +					struct nfsd4_op *op)
> +{
> +	return (op_encode_hdr_size +
> +		3 /* cnr_lease_time */ +
> +		1 /* We support one cnr_source_server */ +
> +		1 /* cnr_stateid seq */ +
> +		op_encode_stateid_maxsz /* cnr_stateid */ +
> +		1 /* num cnr_source_server*/ +
> +		1 /* nl4_type */ +
> +		1 /* nl4 size */ +
> +		XDR_QUADLEN(NFS4_OPAQUE_LIMIT) /*nl4_loc + nl4_loc_sz
> */)
> +		* sizeof(__be32);
> +}
> +
>  #ifdef CONFIG_NFSD_PNFS
>  static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp,
> struct nfsd4_op *op)
>  {
> @@ -2722,6 +2775,12 @@ static inline u32 nfsd4_seek_rsize(struct
> svc_rqst *rqstp, struct nfsd4_op *op)
>  		.op_name = "OP_OFFLOAD_CANCEL",
>  		.op_rsize_bop = nfsd4_only_status_rsize,
>  	},
> +	[OP_COPY_NOTIFY] = {
> +		.op_func = nfsd4_copy_notify,
> +		.op_flags = OP_MODIFIES_SOMETHING,
> +		.op_name = "OP_COPY_NOTIFY",
> +		.op_rsize_bop = nfsd4_copy_notify_rsize,
> +	},
>  };
>  
>  /**
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 05c0295..2555eb9 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -707,6 +707,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct
> nfs4_client *cl, struct kmem_cache *sla
>  	/* Will be incremented before return to client: */
>  	refcount_set(&stid->sc_count, 1);
>  	spin_lock_init(&stid->sc_lock);
> +	INIT_LIST_HEAD(&stid->sc_cp_list);
>  
>  	/*
>  	 * It shouldn't be a problem to reuse an opaque stateid value.
> @@ -726,24 +727,53 @@ struct nfs4_stid *nfs4_alloc_stid(struct
> nfs4_client *cl, struct kmem_cache *sla
>  /*
>   * Create a unique stateid_t to represent each COPY.
>   */
> -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> +static int nfs4_init_cp_state(struct nfsd_net *nn, void *ptr,
> stateid_t *stid)
>  {
>  	int new_id;
>  
>  	idr_preload(GFP_KERNEL);
>  	spin_lock(&nn->s2s_cp_lock);
> -	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0,
> GFP_NOWAIT);
> +	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, ptr, 0, 0,
> GFP_NOWAIT);
>  	spin_unlock(&nn->s2s_cp_lock);
>  	idr_preload_end();
>  	if (new_id < 0)
>  		return 0;
> -	copy->cp_stateid.si_opaque.so_id = new_id;
> -	copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> -	copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> +	stid->si_opaque.so_id = new_id;
> +	stid->si_opaque.so_clid.cl_boot = nn->boot_time;
> +	stid->si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
>  	return 1;
>  }
>  
> -void nfs4_free_cp_state(struct nfsd4_copy *copy)
> +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy
> *copy)
> +{
> +	return nfs4_init_cp_state(nn, copy, &copy->cp_stateid);
> +}
> +
> +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net
> *nn,
> +						     struct nfs4_stid
> *p_stid)
> +{
> +	struct nfs4_cpntf_state *cps;
> +
> +	cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
> +	if (!cps)
> +		return NULL;
> +	if (!nfs4_init_cp_state(nn, cps, &cps->cp_stateid))
> +		goto out_free;
> +	cps->cp_p_stid = p_stid;
> +	cps->cp_active = false;
> +	cps->cp_timeout = jiffies + (nn->nfsd4_lease * HZ);
> +	INIT_LIST_HEAD(&cps->cp_list);
> +	spin_lock(&nn->s2s_cp_lock);
> +	list_add(&cps->cp_list, &p_stid->sc_cp_list);
> +	spin_unlock(&nn->s2s_cp_lock);
> +
> +	return cps;
> +out_free:
> +	kfree(cps);
> +	return NULL;
> +}
> +
> +void nfs4_free_copy_state(struct nfsd4_copy *copy)
>  {
>  	struct nfsd_net *nn;
>  
> @@ -753,6 +783,27 @@ void nfs4_free_cp_state(struct nfsd4_copy *copy)
>  	spin_unlock(&nn->s2s_cp_lock);
>  }
>  
> +static void nfs4_free_cpntf_statelist(struct net *net, struct
> nfs4_stid *stid)
> +{
> +	struct nfs4_cpntf_state *cps;
> +	struct nfsd_net *nn;
> +
> +	nn = net_generic(net, nfsd_net_id);
> +
> +	might_sleep();
> +
> +	spin_lock(&nn->s2s_cp_lock);
> +	while (!list_empty(&stid->sc_cp_list)) {
> +		cps = list_first_entry(&stid->sc_cp_list,
> +				       struct nfs4_cpntf_state,
> cp_list);
> +		list_del(&cps->cp_list);
> +		idr_remove(&nn->s2s_cp_stateids,
> +			   cps->cp_stateid.si_opaque.so_id);
> +		kfree(cps);
> +	}
> +	spin_unlock(&nn->s2s_cp_lock);
> +}
> +
>  static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct
> nfs4_client *clp)
>  {
>  	struct nfs4_stid *stid;
> @@ -901,6 +952,7 @@ static void block_delegations(struct knfsd_fh
> *fh)
>  	}
>  	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
>  	spin_unlock(&clp->cl_lock);
> +	nfs4_free_cpntf_statelist(clp->net, s);
>  	s->sc_free(s);
>  	if (fp)
>  		put_nfs4_file(fp);
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 15f53bb..ed37528 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1847,6 +1847,22 @@ static __be32 nfsd4_decode_nl4_server(struct
> nfsd4_compoundargs *argp,
>  }
>  
>  static __be32
> +nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
> +			 struct nfsd4_copy_notify *cn)
> +{
> +	int status;
> +
> +	status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid);
> +	if (status)
> +		return status;
> +	status = nfsd4_decode_nl4_server(argp, &cn->cpn_dst);

Maybe this could be simplified to "return nfsd4_decode_nl4_server()" ?

> +	if (status)
> +		return status;
> +
> +	return status;
> +}
> +
> +static __be32
>  nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek
> *seek)
>  {
>  	DECODE_HEAD;
> @@ -1947,7 +1963,7 @@ static __be32 nfsd4_decode_nl4_server(struct
> nfsd4_compoundargs *argp,
>  	/* new operations for NFSv4.2 */
>  	[OP_ALLOCATE]		= (nfsd4_dec)nfsd4_decode_fallocate,
>  	[OP_COPY]		= (nfsd4_dec)nfsd4_decode_copy,
> -	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_notsupp,
> +	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_copy_notify,
>  	[OP_DEALLOCATE]		= (nfsd4_dec)nfsd4_decode_fallocate,
>  	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_notsupp,
>  	[OP_LAYOUTERROR]	= (nfsd4_dec)nfsd4_decode_notsupp,
> @@ -4336,6 +4352,45 @@ static __be32 nfsd4_encode_readv(struct
> nfsd4_compoundres *resp,
>  }
>  
>  static __be32
> +nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct
> nl4_server *ns)
> +{
> +	struct xdr_stream *xdr = &resp->xdr;
> +	struct nfs42_netaddr *addr;
> +	__be32 *p;
> +
> +	p = xdr_reserve_space(xdr, 4);
> +	*p++ = cpu_to_be32(ns->nl4_type);
> +
> +	switch (ns->nl4_type) {
> +	case NL4_NETADDR:
> +		addr = &ns->u.nl4_addr;
> +
> +		/* netid_len, netid, uaddr_len, uaddr (port included
> +		 * in RPCBIND_MAXUADDRLEN)
> +		 */
> +		p = xdr_reserve_space(xdr,
> +			4 /* netid len */ +
> +			(XDR_QUADLEN(addr->netid_len) * 4) +
> +			4 /* uaddr len */ +
> +			(XDR_QUADLEN(addr->addr_len) * 4));
> +		if (!p)
> +			return nfserr_resource;
> +
> +		*p++ = cpu_to_be32(addr->netid_len);
> +		p = xdr_encode_opaque_fixed(p, addr->netid,
> +					    addr->netid_len);
> +		*p++ = cpu_to_be32(addr->addr_len);
> +		p = xdr_encode_opaque_fixed(p, addr->addr,
> +					addr->addr_len);
> +		break;
> +	default:
> +		WARN_ON(ns->nl4_type != NL4_NETADDR);
> +	}
> +
> +	return 0;
> +}
> +
> +static __be32
>  nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
>  		  struct nfsd4_copy *copy)
>  {
> @@ -4369,6 +4424,44 @@ static __be32 nfsd4_encode_readv(struct
> nfsd4_compoundres *resp,
>  }
>  
>  static __be32
> +nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32
> nfserr,
> +			 struct nfsd4_copy_notify *cn)
> +{
> +	struct xdr_stream *xdr = &resp->xdr;
> +	__be32 *p;
> +
> +	if (nfserr)
> +		return nfserr;
> +
> +	/* 8 sec, 4 nsec */
> +	p = xdr_reserve_space(xdr, 12);
> +	if (!p)
> +		return nfserr_resource;
> +
> +	/* cnr_lease_time */
> +	p = xdr_encode_hyper(p, cn->cpn_sec);
> +	*p++ = cpu_to_be32(cn->cpn_nsec);
> +
> +	/* cnr_stateid */
> +	nfserr = nfsd4_encode_stateid(xdr, &cn->cpn_cnr_stateid);
> +	if (nfserr)
> +		return nfserr;
> +
> +	/* cnr_src.nl_nsvr */
> +	p = xdr_reserve_space(xdr, 4);
> +	if (!p)
> +		return nfserr_resource;
> +
> +	*p++ = cpu_to_be32(1);
> +
> +	nfserr = nfsd42_encode_nl4_server(resp, &cn->cpn_src);

This could be simplified, too: "return nfsd42_encode_nl4_server()" 

Thanks,
Anna

> +	if (nfserr)
> +		return nfserr;
> +
> +	return nfserr;
> +}
> +
> +static __be32
>  nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
>  		  struct nfsd4_seek *seek)
>  {
> @@ -4465,7 +4558,7 @@ static __be32 nfsd4_encode_readv(struct
> nfsd4_compoundres *resp,
>  	/* NFSv4.2 operations */
>  	[OP_ALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
>  	[OP_COPY]		= (nfsd4_enc)nfsd4_encode_copy,
> -	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_noop,
> +	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_copy_notify,
>  	[OP_DEALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
>  	[OP_IO_ADVISE]		= (nfsd4_enc)nfsd4_encode_noop,
>  	[OP_LAYOUTERROR]	= (nfsd4_enc)nfsd4_encode_noop,
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 5da9cc3..106ed56 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -95,6 +95,7 @@ struct nfs4_stid {
>  #define NFS4_REVOKED_DELEG_STID 16
>  #define NFS4_CLOSED_DELEG_STID 32
>  #define NFS4_LAYOUT_STID 64
> +	struct list_head	sc_cp_list;
>  	unsigned char		sc_type;
>  	stateid_t		sc_stateid;
>  	spinlock_t		sc_lock;
> @@ -103,6 +104,17 @@ struct nfs4_stid {
>  	void			(*sc_free)(struct nfs4_stid *);
>  };
>  
> +/* Keep a list of stateids issued by the COPY_NOTIFY, associate it
> with the
> + * parent OPEN/LOCK/DELEG stateid.
> + */
> +struct nfs4_cpntf_state {
> +	stateid_t		cp_stateid;
> +	struct list_head	cp_list;	/* per parent nfs4_stid */
> +	struct nfs4_stid	*cp_p_stid;	/* pointer to parent */
> +	bool			cp_active;	/* has the copy
> started */
> +	unsigned long		cp_timeout;	/* copy timeout */
> +};
> +
>  /*
>   * Represents a delegation stateid. The nfs4_client holds references
> to these
>   * and they are put when it is being destroyed or when the
> delegation is
> @@ -614,8 +626,10 @@ __be32 nfsd4_lookup_stateid(struct
> nfsd4_compound_state *cstate,
>  		     struct nfs4_stid **s, struct nfsd_net *nn);
>  struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct
> kmem_cache *slab,
>  				  void (*sc_free)(struct nfs4_stid *));
> -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy
> *copy);
> -void nfs4_free_cp_state(struct nfsd4_copy *copy);
> +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy
> *copy);
> +void nfs4_free_copy_state(struct nfsd4_copy *copy);
> +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net
> *nn,
> +			struct nfs4_stid *p_stid);
>  void nfs4_unhash_stid(struct nfs4_stid *s);
>  void nfs4_put_stid(struct nfs4_stid *s);
>  void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid
> *stid);
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 513c9ff..bade8e5 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -568,6 +568,18 @@ struct nfsd4_offload_status {
>  	u32		status;
>  };
>  
> +struct nfsd4_copy_notify {
> +	/* request */
> +	stateid_t		cpn_src_stateid;
> +	struct nl4_server	cpn_dst;
> +
> +	/* response */
> +	stateid_t		cpn_cnr_stateid;
> +	u64			cpn_sec;
> +	u32			cpn_nsec;
> +	struct nl4_server	cpn_src;
> +};
> +
>  struct nfsd4_op {
>  	int					opnum;
>  	const struct nfsd4_operation *		opdesc;
> @@ -627,6 +639,7 @@ struct nfsd4_op {
>  		struct nfsd4_clone		clone;
>  		struct nfsd4_copy		copy;
>  		struct nfsd4_offload_status	offload_status;
> +		struct nfsd4_copy_notify	copy_notify;
>  		struct nfsd4_seek		seek;
>  	} u;
>  	struct nfs4_replay *			replay;
Olga Kornievskaia July 9, 2019, 3:51 p.m. UTC | #2
On Tue, Jul 9, 2019 at 8:34 AM Anna Schumaker <schumaker.anna@gmail.com> wrote:
>
> Hi Olga,
>
> On Mon, 2019-07-08 at 15:23 -0400, Olga Kornievskaia wrote:
> > From: Olga Kornievskaia <kolga@netapp.com>
> >
> > Introducing the COPY_NOTIFY operation.
> >
> > Create a new unique stateid that will keep track of the copy
> > state and the upcoming READs that will use that stateid. Keep
> > it in the list associated with parent stateid.
> >
> > Return single netaddr to advertise to the copy.
> >
> > Signed-off-by: Andy Adamson <andros@netapp.com>
> > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > ---
> >  fs/nfsd/nfs4proc.c  | 71 +++++++++++++++++++++++++++++++++++----
> >  fs/nfsd/nfs4state.c | 64 +++++++++++++++++++++++++++++++----
> >  fs/nfsd/nfs4xdr.c   | 97
> > +++++++++++++++++++++++++++++++++++++++++++++++++++--
> >  fs/nfsd/state.h     | 18 ++++++++--
> >  fs/nfsd/xdr4.h      | 13 +++++++
> >  5 files changed, 247 insertions(+), 16 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index cfd8767..c39fa72 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -37,6 +37,7 @@
> >  #include <linux/falloc.h>
> >  #include <linux/slab.h>
> >  #include <linux/kthread.h>
> > +#include <linux/sunrpc/addr.h>
> >
> >  #include "idmap.h"
> >  #include "cache.h"
> > @@ -1033,7 +1034,8 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst
> > *rqstp, struct svc_fh *fh)
> >  static __be32
> >  nfsd4_verify_copy(struct svc_rqst *rqstp, struct
> > nfsd4_compound_state *cstate,
> >                 stateid_t *src_stateid, struct file **src,
> > -               stateid_t *dst_stateid, struct file **dst)
> > +               stateid_t *dst_stateid, struct file **dst,
> > +               struct nfs4_stid **stid)
> >  {
> >       __be32 status;
> >
> > @@ -1050,7 +1052,7 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst
> > *rqstp, struct svc_fh *fh)
> >
> >       status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate-
> > >current_fh,
> >                                           dst_stateid, WR_STATE, dst,
> > NULL,
> > -                                         NULL);
> > +                                         stid);
> >       if (status) {
> >               dprintk("NFSD: %s: couldn't process dst stateid!\n",
> > __func__);
> >               goto out_put_src;
> > @@ -1081,7 +1083,7 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst
> > *rqstp, struct svc_fh *fh)
> >       __be32 status;
> >
> >       status = nfsd4_verify_copy(rqstp, cstate, &clone-
> > >cl_src_stateid, &src,
> > -                                &clone->cl_dst_stateid, &dst);
> > +                                &clone->cl_dst_stateid, &dst, NULL);
> >       if (status)
> >               goto out;
> >
> > @@ -1228,7 +1230,7 @@ static void dup_copy_fields(struct nfsd4_copy
> > *src, struct nfsd4_copy *dst)
> >
> >  static void cleanup_async_copy(struct nfsd4_copy *copy)
> >  {
> > -     nfs4_free_cp_state(copy);
> > +     nfs4_free_copy_state(copy);
> >       fput(copy->file_dst);
> >       fput(copy->file_src);
> >       spin_lock(&copy->cp_clp->async_lock);
> > @@ -1268,7 +1270,7 @@ static int nfsd4_do_async_copy(void *data)
> >
> >       status = nfsd4_verify_copy(rqstp, cstate, &copy-
> > >cp_src_stateid,
> >                                  &copy->file_src, &copy-
> > >cp_dst_stateid,
> > -                                &copy->file_dst);
> > +                                &copy->file_dst, NULL);
> >       if (status)
> >               goto out;
> >
> > @@ -1282,7 +1284,7 @@ static int nfsd4_do_async_copy(void *data)
> >               async_copy = kzalloc(sizeof(struct nfsd4_copy),
> > GFP_KERNEL);
> >               if (!async_copy)
> >                       goto out;
> > -             if (!nfs4_init_cp_state(nn, copy)) {
> > +             if (!nfs4_init_copy_state(nn, copy)) {
> >                       kfree(async_copy);
> >                       goto out;
> >               }
> > @@ -1346,6 +1348,42 @@ struct nfsd4_copy *
> >  }
> >
> >  static __be32
> > +nfsd4_copy_notify(struct svc_rqst *rqstp, struct
> > nfsd4_compound_state *cstate,
> > +               union nfsd4_op_u *u)
> > +{
> > +     struct nfsd4_copy_notify *cn = &u->copy_notify;
> > +     __be32 status;
> > +     struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> > +     struct nfs4_stid *stid;
> > +     struct nfs4_cpntf_state *cps;
> > +
> > +     status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate-
> > >current_fh,
> > +                                     &cn->cpn_src_stateid, RD_STATE,
> > NULL,
> > +                                     NULL, &stid);
> > +     if (status)
> > +             return status;
> > +
> > +     cn->cpn_sec = nn->nfsd4_lease;
> > +     cn->cpn_nsec = 0;
> > +
> > +     status = nfserrno(-ENOMEM);
> > +     cps = nfs4_alloc_init_cpntf_state(nn, stid);
> > +     if (!cps)
> > +             return status;
> > +     memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid,
> > sizeof(stateid_t));
> > +
> > +     /* For now, only return one server address in cpn_src, the
> > +      * address used by the client to connect to this server.
> > +      */
> > +     cn->cpn_src.nl4_type = NL4_NETADDR;
> > +     status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
> > +                              &cn->cpn_src.u.nl4_addr);
> > +     WARN_ON_ONCE(status);
> > +
> > +     return status;
> > +}
> > +
> > +static __be32
> >  nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state
> > *cstate,
> >               struct nfsd4_fallocate *fallocate, int flags)
> >  {
> > @@ -2298,6 +2336,21 @@ static inline u32
> > nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
> >               1 /* osr_complete<1> optional 0 for now */) *
> > sizeof(__be32);
> >  }
> >
> > +static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp,
> > +                                     struct nfsd4_op *op)
> > +{
> > +     return (op_encode_hdr_size +
> > +             3 /* cnr_lease_time */ +
> > +             1 /* We support one cnr_source_server */ +
> > +             1 /* cnr_stateid seq */ +
> > +             op_encode_stateid_maxsz /* cnr_stateid */ +
> > +             1 /* num cnr_source_server*/ +
> > +             1 /* nl4_type */ +
> > +             1 /* nl4 size */ +
> > +             XDR_QUADLEN(NFS4_OPAQUE_LIMIT) /*nl4_loc + nl4_loc_sz
> > */)
> > +             * sizeof(__be32);
> > +}
> > +
> >  #ifdef CONFIG_NFSD_PNFS
> >  static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp,
> > struct nfsd4_op *op)
> >  {
> > @@ -2722,6 +2775,12 @@ static inline u32 nfsd4_seek_rsize(struct
> > svc_rqst *rqstp, struct nfsd4_op *op)
> >               .op_name = "OP_OFFLOAD_CANCEL",
> >               .op_rsize_bop = nfsd4_only_status_rsize,
> >       },
> > +     [OP_COPY_NOTIFY] = {
> > +             .op_func = nfsd4_copy_notify,
> > +             .op_flags = OP_MODIFIES_SOMETHING,
> > +             .op_name = "OP_COPY_NOTIFY",
> > +             .op_rsize_bop = nfsd4_copy_notify_rsize,
> > +     },
> >  };
> >
> >  /**
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 05c0295..2555eb9 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -707,6 +707,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct
> > nfs4_client *cl, struct kmem_cache *sla
> >       /* Will be incremented before return to client: */
> >       refcount_set(&stid->sc_count, 1);
> >       spin_lock_init(&stid->sc_lock);
> > +     INIT_LIST_HEAD(&stid->sc_cp_list);
> >
> >       /*
> >        * It shouldn't be a problem to reuse an opaque stateid value.
> > @@ -726,24 +727,53 @@ struct nfs4_stid *nfs4_alloc_stid(struct
> > nfs4_client *cl, struct kmem_cache *sla
> >  /*
> >   * Create a unique stateid_t to represent each COPY.
> >   */
> > -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> > +static int nfs4_init_cp_state(struct nfsd_net *nn, void *ptr,
> > stateid_t *stid)
> >  {
> >       int new_id;
> >
> >       idr_preload(GFP_KERNEL);
> >       spin_lock(&nn->s2s_cp_lock);
> > -     new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0,
> > GFP_NOWAIT);
> > +     new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, ptr, 0, 0,
> > GFP_NOWAIT);
> >       spin_unlock(&nn->s2s_cp_lock);
> >       idr_preload_end();
> >       if (new_id < 0)
> >               return 0;
> > -     copy->cp_stateid.si_opaque.so_id = new_id;
> > -     copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> > -     copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> > +     stid->si_opaque.so_id = new_id;
> > +     stid->si_opaque.so_clid.cl_boot = nn->boot_time;
> > +     stid->si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> >       return 1;
> >  }
> >
> > -void nfs4_free_cp_state(struct nfsd4_copy *copy)
> > +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy
> > *copy)
> > +{
> > +     return nfs4_init_cp_state(nn, copy, &copy->cp_stateid);
> > +}
> > +
> > +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net
> > *nn,
> > +                                                  struct nfs4_stid
> > *p_stid)
> > +{
> > +     struct nfs4_cpntf_state *cps;
> > +
> > +     cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
> > +     if (!cps)
> > +             return NULL;
> > +     if (!nfs4_init_cp_state(nn, cps, &cps->cp_stateid))
> > +             goto out_free;
> > +     cps->cp_p_stid = p_stid;
> > +     cps->cp_active = false;
> > +     cps->cp_timeout = jiffies + (nn->nfsd4_lease * HZ);
> > +     INIT_LIST_HEAD(&cps->cp_list);
> > +     spin_lock(&nn->s2s_cp_lock);
> > +     list_add(&cps->cp_list, &p_stid->sc_cp_list);
> > +     spin_unlock(&nn->s2s_cp_lock);
> > +
> > +     return cps;
> > +out_free:
> > +     kfree(cps);
> > +     return NULL;
> > +}
> > +
> > +void nfs4_free_copy_state(struct nfsd4_copy *copy)
> >  {
> >       struct nfsd_net *nn;
> >
> > @@ -753,6 +783,27 @@ void nfs4_free_cp_state(struct nfsd4_copy *copy)
> >       spin_unlock(&nn->s2s_cp_lock);
> >  }
> >
> > +static void nfs4_free_cpntf_statelist(struct net *net, struct
> > nfs4_stid *stid)
> > +{
> > +     struct nfs4_cpntf_state *cps;
> > +     struct nfsd_net *nn;
> > +
> > +     nn = net_generic(net, nfsd_net_id);
> > +
> > +     might_sleep();
> > +
> > +     spin_lock(&nn->s2s_cp_lock);
> > +     while (!list_empty(&stid->sc_cp_list)) {
> > +             cps = list_first_entry(&stid->sc_cp_list,
> > +                                    struct nfs4_cpntf_state,
> > cp_list);
> > +             list_del(&cps->cp_list);
> > +             idr_remove(&nn->s2s_cp_stateids,
> > +                        cps->cp_stateid.si_opaque.so_id);
> > +             kfree(cps);
> > +     }
> > +     spin_unlock(&nn->s2s_cp_lock);
> > +}
> > +
> >  static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct
> > nfs4_client *clp)
> >  {
> >       struct nfs4_stid *stid;
> > @@ -901,6 +952,7 @@ static void block_delegations(struct knfsd_fh
> > *fh)
> >       }
> >       idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> >       spin_unlock(&clp->cl_lock);
> > +     nfs4_free_cpntf_statelist(clp->net, s);
> >       s->sc_free(s);
> >       if (fp)
> >               put_nfs4_file(fp);
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 15f53bb..ed37528 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -1847,6 +1847,22 @@ static __be32 nfsd4_decode_nl4_server(struct
> > nfsd4_compoundargs *argp,
> >  }
> >
> >  static __be32
> > +nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
> > +                      struct nfsd4_copy_notify *cn)
> > +{
> > +     int status;
> > +
> > +     status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid);
> > +     if (status)
> > +             return status;
> > +     status = nfsd4_decode_nl4_server(argp, &cn->cpn_dst);
>
> Maybe this could be simplified to "return nfsd4_decode_nl4_server()" ?
>
> > +     if (status)
> > +             return status;
> > +
> > +     return status;
> > +}
> > +
> > +static __be32
> >  nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek
> > *seek)
> >  {
> >       DECODE_HEAD;
> > @@ -1947,7 +1963,7 @@ static __be32 nfsd4_decode_nl4_server(struct
> > nfsd4_compoundargs *argp,
> >       /* new operations for NFSv4.2 */
> >       [OP_ALLOCATE]           = (nfsd4_dec)nfsd4_decode_fallocate,
> >       [OP_COPY]               = (nfsd4_dec)nfsd4_decode_copy,
> > -     [OP_COPY_NOTIFY]        = (nfsd4_dec)nfsd4_decode_notsupp,
> > +     [OP_COPY_NOTIFY]        = (nfsd4_dec)nfsd4_decode_copy_notify,
> >       [OP_DEALLOCATE]         = (nfsd4_dec)nfsd4_decode_fallocate,
> >       [OP_IO_ADVISE]          = (nfsd4_dec)nfsd4_decode_notsupp,
> >       [OP_LAYOUTERROR]        = (nfsd4_dec)nfsd4_decode_notsupp,
> > @@ -4336,6 +4352,45 @@ static __be32 nfsd4_encode_readv(struct
> > nfsd4_compoundres *resp,
> >  }
> >
> >  static __be32
> > +nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct
> > nl4_server *ns)
> > +{
> > +     struct xdr_stream *xdr = &resp->xdr;
> > +     struct nfs42_netaddr *addr;
> > +     __be32 *p;
> > +
> > +     p = xdr_reserve_space(xdr, 4);
> > +     *p++ = cpu_to_be32(ns->nl4_type);
> > +
> > +     switch (ns->nl4_type) {
> > +     case NL4_NETADDR:
> > +             addr = &ns->u.nl4_addr;
> > +
> > +             /* netid_len, netid, uaddr_len, uaddr (port included
> > +              * in RPCBIND_MAXUADDRLEN)
> > +              */
> > +             p = xdr_reserve_space(xdr,
> > +                     4 /* netid len */ +
> > +                     (XDR_QUADLEN(addr->netid_len) * 4) +
> > +                     4 /* uaddr len */ +
> > +                     (XDR_QUADLEN(addr->addr_len) * 4));
> > +             if (!p)
> > +                     return nfserr_resource;
> > +
> > +             *p++ = cpu_to_be32(addr->netid_len);
> > +             p = xdr_encode_opaque_fixed(p, addr->netid,
> > +                                         addr->netid_len);
> > +             *p++ = cpu_to_be32(addr->addr_len);
> > +             p = xdr_encode_opaque_fixed(p, addr->addr,
> > +                                     addr->addr_len);
> > +             break;
> > +     default:
> > +             WARN_ON(ns->nl4_type != NL4_NETADDR);
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static __be32
> >  nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
> >                 struct nfsd4_copy *copy)
> >  {
> > @@ -4369,6 +4424,44 @@ static __be32 nfsd4_encode_readv(struct
> > nfsd4_compoundres *resp,
> >  }
> >
> >  static __be32
> > +nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32
> > nfserr,
> > +                      struct nfsd4_copy_notify *cn)
> > +{
> > +     struct xdr_stream *xdr = &resp->xdr;
> > +     __be32 *p;
> > +
> > +     if (nfserr)
> > +             return nfserr;
> > +
> > +     /* 8 sec, 4 nsec */
> > +     p = xdr_reserve_space(xdr, 12);
> > +     if (!p)
> > +             return nfserr_resource;
> > +
> > +     /* cnr_lease_time */
> > +     p = xdr_encode_hyper(p, cn->cpn_sec);
> > +     *p++ = cpu_to_be32(cn->cpn_nsec);
> > +
> > +     /* cnr_stateid */
> > +     nfserr = nfsd4_encode_stateid(xdr, &cn->cpn_cnr_stateid);
> > +     if (nfserr)
> > +             return nfserr;
> > +
> > +     /* cnr_src.nl_nsvr */
> > +     p = xdr_reserve_space(xdr, 4);
> > +     if (!p)
> > +             return nfserr_resource;
> > +
> > +     *p++ = cpu_to_be32(1);
> > +
> > +     nfserr = nfsd42_encode_nl4_server(resp, &cn->cpn_src);
>
> This could be simplified, too: "return nfsd42_encode_nl4_server()"

Yep will make the changes.
>
> Thanks,
> Anna
>
> > +     if (nfserr)
> > +             return nfserr;
> > +
> > +     return nfserr;
> > +}
> > +
> > +static __be32
> >  nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
> >                 struct nfsd4_seek *seek)
> >  {
> > @@ -4465,7 +4558,7 @@ static __be32 nfsd4_encode_readv(struct
> > nfsd4_compoundres *resp,
> >       /* NFSv4.2 operations */
> >       [OP_ALLOCATE]           = (nfsd4_enc)nfsd4_encode_noop,
> >       [OP_COPY]               = (nfsd4_enc)nfsd4_encode_copy,
> > -     [OP_COPY_NOTIFY]        = (nfsd4_enc)nfsd4_encode_noop,
> > +     [OP_COPY_NOTIFY]        = (nfsd4_enc)nfsd4_encode_copy_notify,
> >       [OP_DEALLOCATE]         = (nfsd4_enc)nfsd4_encode_noop,
> >       [OP_IO_ADVISE]          = (nfsd4_enc)nfsd4_encode_noop,
> >       [OP_LAYOUTERROR]        = (nfsd4_enc)nfsd4_encode_noop,
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index 5da9cc3..106ed56 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -95,6 +95,7 @@ struct nfs4_stid {
> >  #define NFS4_REVOKED_DELEG_STID 16
> >  #define NFS4_CLOSED_DELEG_STID 32
> >  #define NFS4_LAYOUT_STID 64
> > +     struct list_head        sc_cp_list;
> >       unsigned char           sc_type;
> >       stateid_t               sc_stateid;
> >       spinlock_t              sc_lock;
> > @@ -103,6 +104,17 @@ struct nfs4_stid {
> >       void                    (*sc_free)(struct nfs4_stid *);
> >  };
> >
> > +/* Keep a list of stateids issued by the COPY_NOTIFY, associate it
> > with the
> > + * parent OPEN/LOCK/DELEG stateid.
> > + */
> > +struct nfs4_cpntf_state {
> > +     stateid_t               cp_stateid;
> > +     struct list_head        cp_list;        /* per parent nfs4_stid */
> > +     struct nfs4_stid        *cp_p_stid;     /* pointer to parent */
> > +     bool                    cp_active;      /* has the copy
> > started */
> > +     unsigned long           cp_timeout;     /* copy timeout */
> > +};
> > +
> >  /*
> >   * Represents a delegation stateid. The nfs4_client holds references
> > to these
> >   * and they are put when it is being destroyed or when the
> > delegation is
> > @@ -614,8 +626,10 @@ __be32 nfsd4_lookup_stateid(struct
> > nfsd4_compound_state *cstate,
> >                    struct nfs4_stid **s, struct nfsd_net *nn);
> >  struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct
> > kmem_cache *slab,
> >                                 void (*sc_free)(struct nfs4_stid *));
> > -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy
> > *copy);
> > -void nfs4_free_cp_state(struct nfsd4_copy *copy);
> > +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy
> > *copy);
> > +void nfs4_free_copy_state(struct nfsd4_copy *copy);
> > +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net
> > *nn,
> > +                     struct nfs4_stid *p_stid);
> >  void nfs4_unhash_stid(struct nfs4_stid *s);
> >  void nfs4_put_stid(struct nfs4_stid *s);
> >  void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid
> > *stid);
> > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > index 513c9ff..bade8e5 100644
> > --- a/fs/nfsd/xdr4.h
> > +++ b/fs/nfsd/xdr4.h
> > @@ -568,6 +568,18 @@ struct nfsd4_offload_status {
> >       u32             status;
> >  };
> >
> > +struct nfsd4_copy_notify {
> > +     /* request */
> > +     stateid_t               cpn_src_stateid;
> > +     struct nl4_server       cpn_dst;
> > +
> > +     /* response */
> > +     stateid_t               cpn_cnr_stateid;
> > +     u64                     cpn_sec;
> > +     u32                     cpn_nsec;
> > +     struct nl4_server       cpn_src;
> > +};
> > +
> >  struct nfsd4_op {
> >       int                                     opnum;
> >       const struct nfsd4_operation *          opdesc;
> > @@ -627,6 +639,7 @@ struct nfsd4_op {
> >               struct nfsd4_clone              clone;
> >               struct nfsd4_copy               copy;
> >               struct nfsd4_offload_status     offload_status;
> > +             struct nfsd4_copy_notify        copy_notify;
> >               struct nfsd4_seek               seek;
> >       } u;
> >       struct nfs4_replay *                    replay;
>
J. Bruce Fields July 17, 2019, 10:12 p.m. UTC | #3
On Mon, Jul 08, 2019 at 03:23:48PM -0400, Olga Kornievskaia wrote:
>  static __be32
> +nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns)
> +{
> +	struct xdr_stream *xdr = &resp->xdr;
> +	struct nfs42_netaddr *addr;
> +	__be32 *p;
> +
> +	p = xdr_reserve_space(xdr, 4);
> +	*p++ = cpu_to_be32(ns->nl4_type);
> +
> +	switch (ns->nl4_type) {
> +	case NL4_NETADDR:
> +		addr = &ns->u.nl4_addr;
> +
> +		/* netid_len, netid, uaddr_len, uaddr (port included
> +		 * in RPCBIND_MAXUADDRLEN)
> +		 */
> +		p = xdr_reserve_space(xdr,
> +			4 /* netid len */ +
> +			(XDR_QUADLEN(addr->netid_len) * 4) +
> +			4 /* uaddr len */ +
> +			(XDR_QUADLEN(addr->addr_len) * 4));
> +		if (!p)
> +			return nfserr_resource;
> +
> +		*p++ = cpu_to_be32(addr->netid_len);
> +		p = xdr_encode_opaque_fixed(p, addr->netid,
> +					    addr->netid_len);
> +		*p++ = cpu_to_be32(addr->addr_len);
> +		p = xdr_encode_opaque_fixed(p, addr->addr,
> +					addr->addr_len);
> +		break;
> +	default:
> +		WARN_ON(ns->nl4_type != NL4_NETADDR);

I default to WARN_ON_ONCE().

--b.
J. Bruce Fields July 17, 2019, 10:15 p.m. UTC | #4
On Mon, Jul 08, 2019 at 03:23:48PM -0400, Olga Kornievskaia wrote:
> From: Olga Kornievskaia <kolga@netapp.com>
> 
> Introducing the COPY_NOTIFY operation.
> 
> Create a new unique stateid that will keep track of the copy
> state and the upcoming READs that will use that stateid. Keep
> it in the list associated with parent stateid.
> 
> Return single netaddr to advertise to the copy.
> 
> Signed-off-by: Andy Adamson <andros@netapp.com>
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  fs/nfsd/nfs4proc.c  | 71 +++++++++++++++++++++++++++++++++++----
>  fs/nfsd/nfs4state.c | 64 +++++++++++++++++++++++++++++++----
>  fs/nfsd/nfs4xdr.c   | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++--
>  fs/nfsd/state.h     | 18 ++++++++--
>  fs/nfsd/xdr4.h      | 13 +++++++
>  5 files changed, 247 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index cfd8767..c39fa72 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -37,6 +37,7 @@
>  #include <linux/falloc.h>
>  #include <linux/slab.h>
>  #include <linux/kthread.h>
> +#include <linux/sunrpc/addr.h>
>  
>  #include "idmap.h"
>  #include "cache.h"
> @@ -1033,7 +1034,8 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
>  static __be32
>  nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		  stateid_t *src_stateid, struct file **src,
> -		  stateid_t *dst_stateid, struct file **dst)
> +		  stateid_t *dst_stateid, struct file **dst,
> +		  struct nfs4_stid **stid)
>  {
>  	__be32 status;
>  
> @@ -1050,7 +1052,7 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
>  
>  	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>  					    dst_stateid, WR_STATE, dst, NULL,
> -					    NULL);
> +					    stid);

Doesn't this belong with the previous patch?

--b.
J. Bruce Fields July 17, 2019, 11:07 p.m. UTC | #5
On Mon, Jul 08, 2019 at 03:23:48PM -0400, Olga Kornievskaia wrote:
> @@ -726,24 +727,53 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>  /*
>   * Create a unique stateid_t to represent each COPY.
>   */
> -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> +static int nfs4_init_cp_state(struct nfsd_net *nn, void *ptr, stateid_t *stid)
>  {
>  	int new_id;
>  
>  	idr_preload(GFP_KERNEL);
>  	spin_lock(&nn->s2s_cp_lock);
> -	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
> +	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, ptr, 0, 0, GFP_NOWAIT);
>  	spin_unlock(&nn->s2s_cp_lock);
>  	idr_preload_end();
>  	if (new_id < 0)
>  		return 0;
> -	copy->cp_stateid.si_opaque.so_id = new_id;
> -	copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> -	copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> +	stid->si_opaque.so_id = new_id;
> +	stid->si_opaque.so_clid.cl_boot = nn->boot_time;
> +	stid->si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
>  	return 1;
>  }
>  
> -void nfs4_free_cp_state(struct nfsd4_copy *copy)
> +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> +{
> +	return nfs4_init_cp_state(nn, copy, &copy->cp_stateid);
> +}

This little bit of refactoring could go into a seperate patch.  It's
easier for me to review lots of smaller patches.

But I don't understand why you're doing it.

Also, I'm a little suspicious of code that doesn't initialize an object
till after it's been added to a global structure.  The more typical
pattern is:


	initialize foo
	take locks, add foo global structure, drop locks.

This prevents anyone doing a lookup from finding "foo" while it's still
in a partially initialized state.

--b.
Olga Kornievskaia July 22, 2019, 8:03 p.m. UTC | #6
On Wed, Jul 17, 2019 at 6:15 PM J. Bruce Fields <bfields@fieldses.org> wrote:
>
> On Mon, Jul 08, 2019 at 03:23:48PM -0400, Olga Kornievskaia wrote:
> > From: Olga Kornievskaia <kolga@netapp.com>
> >
> > Introducing the COPY_NOTIFY operation.
> >
> > Create a new unique stateid that will keep track of the copy
> > state and the upcoming READs that will use that stateid. Keep
> > it in the list associated with parent stateid.
> >
> > Return single netaddr to advertise to the copy.
> >
> > Signed-off-by: Andy Adamson <andros@netapp.com>
> > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > ---
> >  fs/nfsd/nfs4proc.c  | 71 +++++++++++++++++++++++++++++++++++----
> >  fs/nfsd/nfs4state.c | 64 +++++++++++++++++++++++++++++++----
> >  fs/nfsd/nfs4xdr.c   | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++--
> >  fs/nfsd/state.h     | 18 ++++++++--
> >  fs/nfsd/xdr4.h      | 13 +++++++
> >  5 files changed, 247 insertions(+), 16 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index cfd8767..c39fa72 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -37,6 +37,7 @@
> >  #include <linux/falloc.h>
> >  #include <linux/slab.h>
> >  #include <linux/kthread.h>
> > +#include <linux/sunrpc/addr.h>
> >
> >  #include "idmap.h"
> >  #include "cache.h"
> > @@ -1033,7 +1034,8 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
> >  static __be32
> >  nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >                 stateid_t *src_stateid, struct file **src,
> > -               stateid_t *dst_stateid, struct file **dst)
> > +               stateid_t *dst_stateid, struct file **dst,
> > +               struct nfs4_stid **stid)
> >  {
> >       __be32 status;
> >
> > @@ -1050,7 +1052,7 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
> >
> >       status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> >                                           dst_stateid, WR_STATE, dst, NULL,
> > -                                         NULL);
> > +                                         stid);
>
> Doesn't this belong with the previous patch?

I could. I think what Andy did was first changed the code to add
ability to add to the nfs4_stid and this patch actually uses it.
Sounds like you prefer this to be in the previous patch?

>
> --b.
Olga Kornievskaia July 22, 2019, 8:17 p.m. UTC | #7
On Wed, Jul 17, 2019 at 7:07 PM J. Bruce Fields <bfields@fieldses.org> wrote:
>
> On Mon, Jul 08, 2019 at 03:23:48PM -0400, Olga Kornievskaia wrote:
> > @@ -726,24 +727,53 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
> >  /*
> >   * Create a unique stateid_t to represent each COPY.
> >   */
> > -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> > +static int nfs4_init_cp_state(struct nfsd_net *nn, void *ptr, stateid_t *stid)
> >  {
> >       int new_id;
> >
> >       idr_preload(GFP_KERNEL);
> >       spin_lock(&nn->s2s_cp_lock);
> > -     new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
> > +     new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, ptr, 0, 0, GFP_NOWAIT);
> >       spin_unlock(&nn->s2s_cp_lock);
> >       idr_preload_end();
> >       if (new_id < 0)
> >               return 0;
> > -     copy->cp_stateid.si_opaque.so_id = new_id;
> > -     copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> > -     copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> > +     stid->si_opaque.so_id = new_id;
> > +     stid->si_opaque.so_clid.cl_boot = nn->boot_time;
> > +     stid->si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> >       return 1;
> >  }
> >
> > -void nfs4_free_cp_state(struct nfsd4_copy *copy)
> > +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> > +{
> > +     return nfs4_init_cp_state(nn, copy, &copy->cp_stateid);
> > +}
>
> This little bit of refactoring could go into a seperate patch.  It's
> easier for me to review lots of smaller patches.
>
> But I don't understand why you're doing it.
>
> Also, I'm a little suspicious of code that doesn't initialize an object
> till after it's been added to a global structure.  The more typical
> pattern is:
>
>
>         initialize foo
>         take locks, add foo global structure, drop locks.
>
> This prevents anyone doing a lookup from finding "foo" while it's still
> in a partially initialized state.

Let me try to explain the change. This change is due to the fact that
now both COPY_NOTIFY and COPY both are generating unique stateid
(COPY_NOTIFY needs a unique stateid to passed into the COPY and COPY
is generating a unique stateid to be referred to by callbacks).
Previously we had just the COPY generating the stateid (so it was
stored in the nfs4_copy structure) but now we have the COPY_NOTIFY
which doesn't create nfs4_copy when it's processing the operation but
still needs a unique stateid (stored in the stateid structure).

Let me see if I understand your suspicion and ask for guidance how to
resolve it as perhaps I'm misusing the function. idr_alloc_cyclic()
keeps track of the structure of the 2nd arguments with a value it
returns. How do I initiate the structure with the value of the
function without knowing the value which can only be returned when I
call the function to add it to the list? what you are suggesting is to
somehow get the value for the new_id but not associate anything then
update the copy structure with that value and then call
idr_alloc_cyclic() (or something else) to create that association of
the new_id and the structure? I don't know how to do that.

>
> --b.
J. Bruce Fields July 23, 2019, 8:45 p.m. UTC | #8
On Mon, Jul 22, 2019 at 04:17:44PM -0400, Olga Kornievskaia wrote:
> On Wed, Jul 17, 2019 at 7:07 PM J. Bruce Fields <bfields@fieldses.org> wrote:
> >
> > On Mon, Jul 08, 2019 at 03:23:48PM -0400, Olga Kornievskaia wrote:
> > > @@ -726,24 +727,53 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
> > >  /*
> > >   * Create a unique stateid_t to represent each COPY.
> > >   */
> > > -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> > > +static int nfs4_init_cp_state(struct nfsd_net *nn, void *ptr, stateid_t *stid)
> > >  {
> > >       int new_id;
> > >
> > >       idr_preload(GFP_KERNEL);
> > >       spin_lock(&nn->s2s_cp_lock);
> > > -     new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
> > > +     new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, ptr, 0, 0, GFP_NOWAIT);
> > >       spin_unlock(&nn->s2s_cp_lock);
> > >       idr_preload_end();
> > >       if (new_id < 0)
> > >               return 0;
> > > -     copy->cp_stateid.si_opaque.so_id = new_id;
> > > -     copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> > > -     copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> > > +     stid->si_opaque.so_id = new_id;
> > > +     stid->si_opaque.so_clid.cl_boot = nn->boot_time;
> > > +     stid->si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> > >       return 1;
> > >  }
> > >
> > > -void nfs4_free_cp_state(struct nfsd4_copy *copy)
> > > +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> > > +{
> > > +     return nfs4_init_cp_state(nn, copy, &copy->cp_stateid);
> > > +}
> >
> > This little bit of refactoring could go into a seperate patch.  It's
> > easier for me to review lots of smaller patches.
> >
> > But I don't understand why you're doing it.
> >
> > Also, I'm a little suspicious of code that doesn't initialize an object
> > till after it's been added to a global structure.  The more typical
> > pattern is:
> >
> >
> >         initialize foo
> >         take locks, add foo global structure, drop locks.
> >
> > This prevents anyone doing a lookup from finding "foo" while it's still
> > in a partially initialized state.
> 
> Let me try to explain the change. This change is due to the fact that
> now both COPY_NOTIFY and COPY both are generating unique stateid
> (COPY_NOTIFY needs a unique stateid to passed into the COPY and COPY
> is generating a unique stateid to be referred to by callbacks).
> Previously we had just the COPY generating the stateid (so it was
> stored in the nfs4_copy structure) but now we have the COPY_NOTIFY
> which doesn't create nfs4_copy when it's processing the operation but
> still needs a unique stateid (stored in the stateid structure).

The usual way to handle a situation like this is to store in the idr a
pointer to the stateid (copy->cp_stateid or cps->cp_stateid).  When you
do a lookup you do something like:

	st = idr_find(...);
	copy = container_of(st, struct nfsd4_copy, cp_stateid);

to get a copy to the larger structure.

By the way, in find_internal_cpntf_state, a buggy or malicious client
could cause idr_find to look up a copy (not a copy_notify) stateid.  The
code needs some way to distinguish the two cases.  You could use a
different cl_id for the two cases.  That might also be handy for
debugging.  And/or you could do as we do in the case of open, lock, and
other stateid's and embed a common structure that also includes a "type"
field.  (See nfs4_stid->sc_type).

> Let me see if I understand your suspicion and ask for guidance how to
> resolve it as perhaps I'm misusing the function. idr_alloc_cyclic()
> keeps track of the structure of the 2nd arguments with a value it
> returns. How do I initiate the structure with the value of the
> function without knowing the value which can only be returned when I
> call the function to add it to the list? what you are suggesting is to
> somehow get the value for the new_id but not associate anything then
> update the copy structure with that value and then call
> idr_alloc_cyclic() (or something else) to create that association of
> the new_id and the structure? I don't know how to do that.

You could move the initialization under the s2s_cp_lock.  But there's
additional initialization that's done in the caller.

So, either this needs more locking, or maybe some flag value set to
indicate that the object is initialized and safe to use.  (In the case
of open/lock/etc.  stateid's I think that is sc_type.  I'm not
completely convinced we've got that correct, though.)

--b.
Olga Kornievskaia July 30, 2019, 3:48 p.m. UTC | #9
On Tue, Jul 23, 2019 at 4:46 PM J. Bruce Fields <bfields@fieldses.org> wrote:
>
> On Mon, Jul 22, 2019 at 04:17:44PM -0400, Olga Kornievskaia wrote:
> > On Wed, Jul 17, 2019 at 7:07 PM J. Bruce Fields <bfields@fieldses.org> wrote:
> > >
> > > On Mon, Jul 08, 2019 at 03:23:48PM -0400, Olga Kornievskaia wrote:
> > > > @@ -726,24 +727,53 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
> > > >  /*
> > > >   * Create a unique stateid_t to represent each COPY.
> > > >   */
> > > > -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> > > > +static int nfs4_init_cp_state(struct nfsd_net *nn, void *ptr, stateid_t *stid)
> > > >  {
> > > >       int new_id;
> > > >
> > > >       idr_preload(GFP_KERNEL);
> > > >       spin_lock(&nn->s2s_cp_lock);
> > > > -     new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
> > > > +     new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, ptr, 0, 0, GFP_NOWAIT);
> > > >       spin_unlock(&nn->s2s_cp_lock);
> > > >       idr_preload_end();
> > > >       if (new_id < 0)
> > > >               return 0;
> > > > -     copy->cp_stateid.si_opaque.so_id = new_id;
> > > > -     copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> > > > -     copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> > > > +     stid->si_opaque.so_id = new_id;
> > > > +     stid->si_opaque.so_clid.cl_boot = nn->boot_time;
> > > > +     stid->si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> > > >       return 1;
> > > >  }
> > > >
> > > > -void nfs4_free_cp_state(struct nfsd4_copy *copy)
> > > > +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> > > > +{
> > > > +     return nfs4_init_cp_state(nn, copy, &copy->cp_stateid);
> > > > +}
> > >
> > > This little bit of refactoring could go into a seperate patch.  It's
> > > easier for me to review lots of smaller patches.
> > >
> > > But I don't understand why you're doing it.
> > >
> > > Also, I'm a little suspicious of code that doesn't initialize an object
> > > till after it's been added to a global structure.  The more typical
> > > pattern is:
> > >
> > >
> > >         initialize foo
> > >         take locks, add foo global structure, drop locks.
> > >
> > > This prevents anyone doing a lookup from finding "foo" while it's still
> > > in a partially initialized state.
> >
> > Let me try to explain the change. This change is due to the fact that
> > now both COPY_NOTIFY and COPY both are generating unique stateid
> > (COPY_NOTIFY needs a unique stateid to passed into the COPY and COPY
> > is generating a unique stateid to be referred to by callbacks).
> > Previously we had just the COPY generating the stateid (so it was
> > stored in the nfs4_copy structure) but now we have the COPY_NOTIFY
> > which doesn't create nfs4_copy when it's processing the operation but
> > still needs a unique stateid (stored in the stateid structure).
>
> The usual way to handle a situation like this is to store in the idr a
> pointer to the stateid (copy->cp_stateid or cps->cp_stateid).  When you

Ok I'll store the stateid directly.

> do a lookup you do something like:
>
>         st = idr_find(...);
>         copy = container_of(st, struct nfsd4_copy, cp_stateid);
>
> to get a copy to the larger structure.
>
> By the way, in find_internal_cpntf_state, a buggy or malicious client
> could cause idr_find to look up a copy (not a copy_notify) stateid.  The
> code needs some way to distinguish the two cases.  You could use a
> different cl_id for the two cases.  That might also be handy for
> debugging.  And/or you could do as we do in the case of open, lock, and
> other stateid's and embed a common structure that also includes a "type"
> field.  (See nfs4_stid->sc_type).

I'll add 2 new sc_types and make sure that during the look up the
entry retrieved is of the appropriate type.


> > Let me see if I understand your suspicion and ask for guidance how to
> > resolve it as perhaps I'm misusing the function. idr_alloc_cyclic()
> > keeps track of the structure of the 2nd arguments with a value it
> > returns. How do I initiate the structure with the value of the
> > function without knowing the value which can only be returned when I
> > call the function to add it to the list? what you are suggesting is to
> > somehow get the value for the new_id but not associate anything then
> > update the copy structure with that value and then call
> > idr_alloc_cyclic() (or something else) to create that association of
> > the new_id and the structure? I don't know how to do that.
>
> You could move the initialization under the s2s_cp_lock.  But there's
> additional initialization that's done in the caller.

I still don't understand what you are looking for here and why. I'm
following what the normal stid allocation does. There is no extra code
there to see if it initiated or not. nfs4_alloc_stid() calls
idr_alloc_cyclic() creates an association between the stid pointer and
at the time uninitialized nfs4_stid structure which is then filled in
with the return of the idr_alloc_cyclic(). That's exactly what the new
code is doing (well accept that i'll change it to store the
stateid_t).

> So, either this needs more locking, or maybe some flag value set to
> indicate that the object is initialized and safe to use.  (In the case
> of open/lock/etc.  stateid's I think that is sc_type.  I'm not
> completely convinced we've got that correct, though.)
>
> --b.
J. Bruce Fields July 30, 2019, 3:55 p.m. UTC | #10
On Tue, Jul 30, 2019 at 11:48:27AM -0400, Olga Kornievskaia wrote:
> On Tue, Jul 23, 2019 at 4:46 PM J. Bruce Fields <bfields@fieldses.org> wrote:
> >
> > On Mon, Jul 22, 2019 at 04:17:44PM -0400, Olga Kornievskaia wrote:
> > > Let me see if I understand your suspicion and ask for guidance how to
> > > resolve it as perhaps I'm misusing the function. idr_alloc_cyclic()
> > > keeps track of the structure of the 2nd arguments with a value it
> > > returns. How do I initiate the structure with the value of the
> > > function without knowing the value which can only be returned when I
> > > call the function to add it to the list? what you are suggesting is to
> > > somehow get the value for the new_id but not associate anything then
> > > update the copy structure with that value and then call
> > > idr_alloc_cyclic() (or something else) to create that association of
> > > the new_id and the structure? I don't know how to do that.
> >
> > You could move the initialization under the s2s_cp_lock.  But there's
> > additional initialization that's done in the caller.
> 
> I still don't understand what you are looking for here and why. I'm
> following what the normal stid allocation does.  There is no extra code
> there to see if it initiated or not. nfs4_alloc_stid() calls
> idr_alloc_cyclic() creates an association between the stid pointer and
> at the time uninitialized nfs4_stid structure which is then filled in
> with the return of the idr_alloc_cyclic(). That's exactly what the new
> code is doing (well accept that i'll change it to store the
> stateid_t).

Yes, I'm a little worried about normal stid allocation too.  It's got
one extra safeguard because of the check for 0 sc_type in the lookup,
I haven't yet convinced myself that's enough.

The race I'm worried about is: one task does the idr allocation and
drops locks.  Before it has the chance to finish initializing the
object, a second task looks it up in the idr and does something with it.
It sees the not-yet-initialized fields.

--b.
Olga Kornievskaia July 30, 2019, 4:13 p.m. UTC | #11
On Tue, Jul 30, 2019 at 11:55 AM J. Bruce Fields <bfields@fieldses.org> wrote:
>
> On Tue, Jul 30, 2019 at 11:48:27AM -0400, Olga Kornievskaia wrote:
> > On Tue, Jul 23, 2019 at 4:46 PM J. Bruce Fields <bfields@fieldses.org> wrote:
> > >
> > > On Mon, Jul 22, 2019 at 04:17:44PM -0400, Olga Kornievskaia wrote:
> > > > Let me see if I understand your suspicion and ask for guidance how to
> > > > resolve it as perhaps I'm misusing the function. idr_alloc_cyclic()
> > > > keeps track of the structure of the 2nd arguments with a value it
> > > > returns. How do I initiate the structure with the value of the
> > > > function without knowing the value which can only be returned when I
> > > > call the function to add it to the list? what you are suggesting is to
> > > > somehow get the value for the new_id but not associate anything then
> > > > update the copy structure with that value and then call
> > > > idr_alloc_cyclic() (or something else) to create that association of
> > > > the new_id and the structure? I don't know how to do that.
> > >
> > > You could move the initialization under the s2s_cp_lock.  But there's
> > > additional initialization that's done in the caller.
> >
> > I still don't understand what you are looking for here and why. I'm
> > following what the normal stid allocation does.  There is no extra code
> > there to see if it initiated or not. nfs4_alloc_stid() calls
> > idr_alloc_cyclic() creates an association between the stid pointer and
> > at the time uninitialized nfs4_stid structure which is then filled in
> > with the return of the idr_alloc_cyclic(). That's exactly what the new
> > code is doing (well accept that i'll change it to store the
> > stateid_t).
>
> Yes, I'm a little worried about normal stid allocation too.  It's got
> one extra safeguard because of the check for 0 sc_type in the lookup,
> I haven't yet convinced myself that's enough.
>
> The race I'm worried about is: one task does the idr allocation and
> drops locks.  Before it has the chance to finish initializing the
> object, a second task looks it up in the idr and does something with it.
> It sees the not-yet-initialized fields.

Can the spin_lock() that we call before the idr_alloc_cyclic() be held
thru the initialization of the stid then? I'm just not sure what this
idr_preload_end() with a spin_lock but otherwise I don't see why we
can't and since idr_find() takes the same spin lock before the call,
it would solve the problem.

>
> --b.
Olga Kornievskaia July 30, 2019, 5:10 p.m. UTC | #12
On Tue, Jul 30, 2019 at 12:13 PM Olga Kornievskaia
<olga.kornievskaia@gmail.com> wrote:
>
> On Tue, Jul 30, 2019 at 11:55 AM J. Bruce Fields <bfields@fieldses.org> wrote:
> >
> > On Tue, Jul 30, 2019 at 11:48:27AM -0400, Olga Kornievskaia wrote:
> > > On Tue, Jul 23, 2019 at 4:46 PM J. Bruce Fields <bfields@fieldses.org> wrote:
> > > >
> > > > On Mon, Jul 22, 2019 at 04:17:44PM -0400, Olga Kornievskaia wrote:
> > > > > Let me see if I understand your suspicion and ask for guidance how to
> > > > > resolve it as perhaps I'm misusing the function. idr_alloc_cyclic()
> > > > > keeps track of the structure of the 2nd arguments with a value it
> > > > > returns. How do I initiate the structure with the value of the
> > > > > function without knowing the value which can only be returned when I
> > > > > call the function to add it to the list? what you are suggesting is to
> > > > > somehow get the value for the new_id but not associate anything then
> > > > > update the copy structure with that value and then call
> > > > > idr_alloc_cyclic() (or something else) to create that association of
> > > > > the new_id and the structure? I don't know how to do that.
> > > >
> > > > You could move the initialization under the s2s_cp_lock.  But there's
> > > > additional initialization that's done in the caller.
> > >
> > > I still don't understand what you are looking for here and why. I'm
> > > following what the normal stid allocation does.  There is no extra code
> > > there to see if it initiated or not. nfs4_alloc_stid() calls
> > > idr_alloc_cyclic() creates an association between the stid pointer and
> > > at the time uninitialized nfs4_stid structure which is then filled in
> > > with the return of the idr_alloc_cyclic(). That's exactly what the new
> > > code is doing (well accept that i'll change it to store the
> > > stateid_t).
> >
> > Yes, I'm a little worried about normal stid allocation too.  It's got
> > one extra safeguard because of the check for 0 sc_type in the lookup,
> > I haven't yet convinced myself that's enough.
> >
> > The race I'm worried about is: one task does the idr allocation and
> > drops locks.  Before it has the chance to finish initializing the
> > object, a second task looks it up in the idr and does something with it.
> > It sees the not-yet-initialized fields.
>
> Can the spin_lock() that we call before the idr_alloc_cyclic() be held
> thru the initialization of the stid then? I'm just not sure what this
> idr_preload_end() with a spin_lock but otherwise I don't see why we
> can't and since idr_find() takes the same spin lock before the call,
> it would solve the problem.

actually instead moving initialization of other stid fields prior to
calling the idr_alloc_cycle would never expose the un-initialized
value so

stid->..cl_boot = nn->boot_time
stid->.. cl_id = nn->..id
..
spinlock()
newid = idr_alloc_cycle(stid)
stid->..id = newid
unlock()


>
> >
> > --b.
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index cfd8767..c39fa72 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -37,6 +37,7 @@ 
 #include <linux/falloc.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
+#include <linux/sunrpc/addr.h>
 
 #include "idmap.h"
 #include "cache.h"
@@ -1033,7 +1034,8 @@  static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
 static __be32
 nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		  stateid_t *src_stateid, struct file **src,
-		  stateid_t *dst_stateid, struct file **dst)
+		  stateid_t *dst_stateid, struct file **dst,
+		  struct nfs4_stid **stid)
 {
 	__be32 status;
 
@@ -1050,7 +1052,7 @@  static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					    dst_stateid, WR_STATE, dst, NULL,
-					    NULL);
+					    stid);
 	if (status) {
 		dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
 		goto out_put_src;
@@ -1081,7 +1083,7 @@  static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
 	__be32 status;
 
 	status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
-				   &clone->cl_dst_stateid, &dst);
+				   &clone->cl_dst_stateid, &dst, NULL);
 	if (status)
 		goto out;
 
@@ -1228,7 +1230,7 @@  static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
 
 static void cleanup_async_copy(struct nfsd4_copy *copy)
 {
-	nfs4_free_cp_state(copy);
+	nfs4_free_copy_state(copy);
 	fput(copy->file_dst);
 	fput(copy->file_src);
 	spin_lock(&copy->cp_clp->async_lock);
@@ -1268,7 +1270,7 @@  static int nfsd4_do_async_copy(void *data)
 
 	status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
 				   &copy->file_src, &copy->cp_dst_stateid,
-				   &copy->file_dst);
+				   &copy->file_dst, NULL);
 	if (status)
 		goto out;
 
@@ -1282,7 +1284,7 @@  static int nfsd4_do_async_copy(void *data)
 		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
 		if (!async_copy)
 			goto out;
-		if (!nfs4_init_cp_state(nn, copy)) {
+		if (!nfs4_init_copy_state(nn, copy)) {
 			kfree(async_copy);
 			goto out;
 		}
@@ -1346,6 +1348,42 @@  struct nfsd4_copy *
 }
 
 static __be32
+nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		  union nfsd4_op_u *u)
+{
+	struct nfsd4_copy_notify *cn = &u->copy_notify;
+	__be32 status;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+	struct nfs4_stid *stid;
+	struct nfs4_cpntf_state *cps;
+
+	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+					&cn->cpn_src_stateid, RD_STATE, NULL,
+					NULL, &stid);
+	if (status)
+		return status;
+
+	cn->cpn_sec = nn->nfsd4_lease;
+	cn->cpn_nsec = 0;
+
+	status = nfserrno(-ENOMEM);
+	cps = nfs4_alloc_init_cpntf_state(nn, stid);
+	if (!cps)
+		return status;
+	memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid, sizeof(stateid_t));
+
+	/* For now, only return one server address in cpn_src, the
+	 * address used by the client to connect to this server.
+	 */
+	cn->cpn_src.nl4_type = NL4_NETADDR;
+	status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
+				 &cn->cpn_src.u.nl4_addr);
+	WARN_ON_ONCE(status);
+
+	return status;
+}
+
+static __be32
 nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		struct nfsd4_fallocate *fallocate, int flags)
 {
@@ -2298,6 +2336,21 @@  static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
 		1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp,
+					struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size +
+		3 /* cnr_lease_time */ +
+		1 /* We support one cnr_source_server */ +
+		1 /* cnr_stateid seq */ +
+		op_encode_stateid_maxsz /* cnr_stateid */ +
+		1 /* num cnr_source_server*/ +
+		1 /* nl4_type */ +
+		1 /* nl4 size */ +
+		XDR_QUADLEN(NFS4_OPAQUE_LIMIT) /*nl4_loc + nl4_loc_sz */)
+		* sizeof(__be32);
+}
+
 #ifdef CONFIG_NFSD_PNFS
 static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
@@ -2722,6 +2775,12 @@  static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 		.op_name = "OP_OFFLOAD_CANCEL",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
+	[OP_COPY_NOTIFY] = {
+		.op_func = nfsd4_copy_notify,
+		.op_flags = OP_MODIFIES_SOMETHING,
+		.op_name = "OP_COPY_NOTIFY",
+		.op_rsize_bop = nfsd4_copy_notify_rsize,
+	},
 };
 
 /**
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 05c0295..2555eb9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -707,6 +707,7 @@  struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
 	/* Will be incremented before return to client: */
 	refcount_set(&stid->sc_count, 1);
 	spin_lock_init(&stid->sc_lock);
+	INIT_LIST_HEAD(&stid->sc_cp_list);
 
 	/*
 	 * It shouldn't be a problem to reuse an opaque stateid value.
@@ -726,24 +727,53 @@  struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
 /*
  * Create a unique stateid_t to represent each COPY.
  */
-int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+static int nfs4_init_cp_state(struct nfsd_net *nn, void *ptr, stateid_t *stid)
 {
 	int new_id;
 
 	idr_preload(GFP_KERNEL);
 	spin_lock(&nn->s2s_cp_lock);
-	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
+	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, ptr, 0, 0, GFP_NOWAIT);
 	spin_unlock(&nn->s2s_cp_lock);
 	idr_preload_end();
 	if (new_id < 0)
 		return 0;
-	copy->cp_stateid.si_opaque.so_id = new_id;
-	copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
-	copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+	stid->si_opaque.so_id = new_id;
+	stid->si_opaque.so_clid.cl_boot = nn->boot_time;
+	stid->si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
 	return 1;
 }
 
-void nfs4_free_cp_state(struct nfsd4_copy *copy)
+int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+{
+	return nfs4_init_cp_state(nn, copy, &copy->cp_stateid);
+}
+
+struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+						     struct nfs4_stid *p_stid)
+{
+	struct nfs4_cpntf_state *cps;
+
+	cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
+	if (!cps)
+		return NULL;
+	if (!nfs4_init_cp_state(nn, cps, &cps->cp_stateid))
+		goto out_free;
+	cps->cp_p_stid = p_stid;
+	cps->cp_active = false;
+	cps->cp_timeout = jiffies + (nn->nfsd4_lease * HZ);
+	INIT_LIST_HEAD(&cps->cp_list);
+	spin_lock(&nn->s2s_cp_lock);
+	list_add(&cps->cp_list, &p_stid->sc_cp_list);
+	spin_unlock(&nn->s2s_cp_lock);
+
+	return cps;
+out_free:
+	kfree(cps);
+	return NULL;
+}
+
+void nfs4_free_copy_state(struct nfsd4_copy *copy)
 {
 	struct nfsd_net *nn;
 
@@ -753,6 +783,27 @@  void nfs4_free_cp_state(struct nfsd4_copy *copy)
 	spin_unlock(&nn->s2s_cp_lock);
 }
 
+static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid)
+{
+	struct nfs4_cpntf_state *cps;
+	struct nfsd_net *nn;
+
+	nn = net_generic(net, nfsd_net_id);
+
+	might_sleep();
+
+	spin_lock(&nn->s2s_cp_lock);
+	while (!list_empty(&stid->sc_cp_list)) {
+		cps = list_first_entry(&stid->sc_cp_list,
+				       struct nfs4_cpntf_state, cp_list);
+		list_del(&cps->cp_list);
+		idr_remove(&nn->s2s_cp_stateids,
+			   cps->cp_stateid.si_opaque.so_id);
+		kfree(cps);
+	}
+	spin_unlock(&nn->s2s_cp_lock);
+}
+
 static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
 {
 	struct nfs4_stid *stid;
@@ -901,6 +952,7 @@  static void block_delegations(struct knfsd_fh *fh)
 	}
 	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
 	spin_unlock(&clp->cl_lock);
+	nfs4_free_cpntf_statelist(clp->net, s);
 	s->sc_free(s);
 	if (fp)
 		put_nfs4_file(fp);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 15f53bb..ed37528 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1847,6 +1847,22 @@  static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
 }
 
 static __be32
+nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+			 struct nfsd4_copy_notify *cn)
+{
+	int status;
+
+	status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid);
+	if (status)
+		return status;
+	status = nfsd4_decode_nl4_server(argp, &cn->cpn_dst);
+	if (status)
+		return status;
+
+	return status;
+}
+
+static __be32
 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
 {
 	DECODE_HEAD;
@@ -1947,7 +1963,7 @@  static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
 	/* new operations for NFSv4.2 */
 	[OP_ALLOCATE]		= (nfsd4_dec)nfsd4_decode_fallocate,
 	[OP_COPY]		= (nfsd4_dec)nfsd4_decode_copy,
-	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_copy_notify,
 	[OP_DEALLOCATE]		= (nfsd4_dec)nfsd4_decode_fallocate,
 	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_LAYOUTERROR]	= (nfsd4_dec)nfsd4_decode_notsupp,
@@ -4336,6 +4352,45 @@  static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 }
 
 static __be32
+nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	struct nfs42_netaddr *addr;
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	*p++ = cpu_to_be32(ns->nl4_type);
+
+	switch (ns->nl4_type) {
+	case NL4_NETADDR:
+		addr = &ns->u.nl4_addr;
+
+		/* netid_len, netid, uaddr_len, uaddr (port included
+		 * in RPCBIND_MAXUADDRLEN)
+		 */
+		p = xdr_reserve_space(xdr,
+			4 /* netid len */ +
+			(XDR_QUADLEN(addr->netid_len) * 4) +
+			4 /* uaddr len */ +
+			(XDR_QUADLEN(addr->addr_len) * 4));
+		if (!p)
+			return nfserr_resource;
+
+		*p++ = cpu_to_be32(addr->netid_len);
+		p = xdr_encode_opaque_fixed(p, addr->netid,
+					    addr->netid_len);
+		*p++ = cpu_to_be32(addr->addr_len);
+		p = xdr_encode_opaque_fixed(p, addr->addr,
+					addr->addr_len);
+		break;
+	default:
+		WARN_ON(ns->nl4_type != NL4_NETADDR);
+	}
+
+	return 0;
+}
+
+static __be32
 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_copy *copy)
 {
@@ -4369,6 +4424,44 @@  static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 }
 
 static __be32
+nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
+			 struct nfsd4_copy_notify *cn)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	__be32 *p;
+
+	if (nfserr)
+		return nfserr;
+
+	/* 8 sec, 4 nsec */
+	p = xdr_reserve_space(xdr, 12);
+	if (!p)
+		return nfserr_resource;
+
+	/* cnr_lease_time */
+	p = xdr_encode_hyper(p, cn->cpn_sec);
+	*p++ = cpu_to_be32(cn->cpn_nsec);
+
+	/* cnr_stateid */
+	nfserr = nfsd4_encode_stateid(xdr, &cn->cpn_cnr_stateid);
+	if (nfserr)
+		return nfserr;
+
+	/* cnr_src.nl_nsvr */
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return nfserr_resource;
+
+	*p++ = cpu_to_be32(1);
+
+	nfserr = nfsd42_encode_nl4_server(resp, &cn->cpn_src);
+	if (nfserr)
+		return nfserr;
+
+	return nfserr;
+}
+
+static __be32
 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_seek *seek)
 {
@@ -4465,7 +4558,7 @@  static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 	/* NFSv4.2 operations */
 	[OP_ALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_COPY]		= (nfsd4_enc)nfsd4_encode_copy,
-	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_copy_notify,
 	[OP_DEALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_IO_ADVISE]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_LAYOUTERROR]	= (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 5da9cc3..106ed56 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -95,6 +95,7 @@  struct nfs4_stid {
 #define NFS4_REVOKED_DELEG_STID 16
 #define NFS4_CLOSED_DELEG_STID 32
 #define NFS4_LAYOUT_STID 64
+	struct list_head	sc_cp_list;
 	unsigned char		sc_type;
 	stateid_t		sc_stateid;
 	spinlock_t		sc_lock;
@@ -103,6 +104,17 @@  struct nfs4_stid {
 	void			(*sc_free)(struct nfs4_stid *);
 };
 
+/* Keep a list of stateids issued by the COPY_NOTIFY, associate it with the
+ * parent OPEN/LOCK/DELEG stateid.
+ */
+struct nfs4_cpntf_state {
+	stateid_t		cp_stateid;
+	struct list_head	cp_list;	/* per parent nfs4_stid */
+	struct nfs4_stid	*cp_p_stid;	/* pointer to parent */
+	bool			cp_active;	/* has the copy started */
+	unsigned long		cp_timeout;	/* copy timeout */
+};
+
 /*
  * Represents a delegation stateid. The nfs4_client holds references to these
  * and they are put when it is being destroyed or when the delegation is
@@ -614,8 +626,10 @@  __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 		     struct nfs4_stid **s, struct nfsd_net *nn);
 struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
 				  void (*sc_free)(struct nfs4_stid *));
-int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
-void nfs4_free_cp_state(struct nfsd4_copy *copy);
+int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
+void nfs4_free_copy_state(struct nfsd4_copy *copy);
+struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+			struct nfs4_stid *p_stid);
 void nfs4_unhash_stid(struct nfs4_stid *s);
 void nfs4_put_stid(struct nfs4_stid *s);
 void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 513c9ff..bade8e5 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -568,6 +568,18 @@  struct nfsd4_offload_status {
 	u32		status;
 };
 
+struct nfsd4_copy_notify {
+	/* request */
+	stateid_t		cpn_src_stateid;
+	struct nl4_server	cpn_dst;
+
+	/* response */
+	stateid_t		cpn_cnr_stateid;
+	u64			cpn_sec;
+	u32			cpn_nsec;
+	struct nl4_server	cpn_src;
+};
+
 struct nfsd4_op {
 	int					opnum;
 	const struct nfsd4_operation *		opdesc;
@@ -627,6 +639,7 @@  struct nfsd4_op {
 		struct nfsd4_clone		clone;
 		struct nfsd4_copy		copy;
 		struct nfsd4_offload_status	offload_status;
+		struct nfsd4_copy_notify	copy_notify;
 		struct nfsd4_seek		seek;
 	} u;
 	struct nfs4_replay *			replay;