diff mbox series

[v1,09/13] NFSD add COPY_NOTIFY operation

Message ID 20181019152905.32418-10-olga.kornievskaia@gmail.com (mailing list archive)
State New, archived
Headers show
Series server-side support for "inter" SSC copy | expand

Commit Message

Olga Kornievskaia Oct. 19, 2018, 3:29 p.m. UTC
From: Olga Kornievskaia <kolga@netapp.com>

Introducing the COPY_NOTIFY operation.

Create a new unique stateid that will keep track of the copy
state and the upcoming READs that will use that stateid. Keep
it in the list associated with parent stateid.

Return single netaddr to advertise to the copy.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfsd/nfs4proc.c  | 69 ++++++++++++++++++++++++++++++++++---
 fs/nfsd/nfs4state.c | 70 +++++++++++++++++++++++++++++++++-----
 fs/nfsd/nfs4xdr.c   | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/nfsd/state.h     | 15 +++++++++
 fs/nfsd/xdr4.h      | 13 +++++++
 5 files changed, 250 insertions(+), 14 deletions(-)

Comments

J. Bruce Fields Nov. 5, 2018, 5:50 p.m. UTC | #1
On Fri, Oct 19, 2018 at 11:29:01AM -0400, Olga Kornievskaia wrote:
> @@ -1345,6 +1347,44 @@ struct nfsd4_copy *
>  }
>  
>  static __be32
> +nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> +		  union nfsd4_op_u *u)
> +{
> +	struct nfsd4_copy_notify *cn = &u->copy_notify;
> +	__be32 status;
> +	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> +	struct nfs4_stid *stid;
> +	struct nfs4_cpntf_state *cps;
> +
> +	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> +					&cn->cpn_src_stateid, RD_STATE, NULL,
> +					NULL, &stid);
> +	if (status)
> +		return status;
> +
> +	cn->cpn_sec = nn->nfsd4_lease;
> +	cn->cpn_nsec = 0;
> +
> +	status = nfserrno(-ENOMEM);
> +	cps = nfs4_alloc_init_cpntf_state(nn, stid);
> +	if (!cps)
> +		return status;
> +	memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid, sizeof(stateid_t));
> +
> +	/**
> +	 * For now, only return one server address in cpn_src, the
> +	 * address used by the client to connect to this server.
> +	 */
> +	cn->cpn_src.nl4_type = NL4_NETADDR;
> +	status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
> +				 &cn->cpn_src.u.nl4_addr);
> +	if (status != 0)
> +		nfs4_free_cpntf_state(cps);

It looks like this would free cps while it was still on the parent's
sc_cp_list.

Is an error actually possible here? If not, just remove this check or
replace it by WARN_ON_ONCE(status).

> +
> +	return status;
> +}
...
> @@ -2720,6 +2775,12 @@ static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
>  		.op_name = "OP_OFFLOAD_CANCEL",
>  		.op_rsize_bop = nfsd4_only_status_rsize,
>  	},
> +	[OP_COPY_NOTIFY] = {
> +		.op_func = nfsd4_copy_notify,
> +		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,

CACHEME is actually only used for 4.0, let's drop it from
nfs4.1/4.2-only operations.

> +		.op_name = "OP_COPY_NOTIFY",
> +		.op_rsize_bop = nfsd4_copy_notify_rsize,
> +	},
>  };
>  
>  /**
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index e263fd0..7764a8b 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -697,6 +697,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>  	/* Will be incremented before return to client: */
>  	refcount_set(&stid->sc_count, 1);
>  	spin_lock_init(&stid->sc_lock);
> +	INIT_LIST_HEAD(&stid->sc_cp_list);
>  
>  	/*
>  	 * It shouldn't be a problem to reuse an opaque stateid value.
> @@ -716,33 +717,85 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>  /*
>   * Create a unique stateid_t to represent each COPY.
>   */
> -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> +int _nfs4_init_state(struct nfsd_net *nn, void *ptr, stateid_t *stid)

This is still copy-specific code, so the names might be more
helpful if they left that in.  Maybe:

	_nfs4_init_state -> nfs4_init_cp_state
	nfs4_init_cp_state -> nfs4_init_copy_state
	nfs4_init_cp_state -> nfs4_init_cpnotify_state

?  Unless you can think of something better.

>  {
>  	int new_id;
>  
>  	idr_preload(GFP_KERNEL);
>  	spin_lock(&nn->s2s_cp_lock);
> -	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
> +	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, ptr, 0, 0, GFP_NOWAIT);
>  	spin_unlock(&nn->s2s_cp_lock);
>  	idr_preload_end();
>  	if (new_id < 0)
>  		return 0;
> -	copy->cp_stateid.si_opaque.so_id = new_id;
> -	copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> -	copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> +	stid->si_opaque.so_id = new_id;
> +	stid->si_opaque.so_clid.cl_boot = nn->boot_time;
> +	stid->si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
>  	return 1;
>  }
>  
> -void nfs4_free_cp_state(struct nfsd4_copy *copy)
> +int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> +{
> +	return _nfs4_init_state(nn, copy, &copy->cp_stateid);
> +}
> +
> +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
> +						     struct nfs4_stid *p_stid)
> +{
> +	struct nfs4_cpntf_state *cps;
> +
> +	cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
> +	if (!cps)
> +		return NULL;
> +	if (!_nfs4_init_state(nn, cps, &cps->cp_stateid))
> +		goto out_free;
> +	cps->cp_p_stid = p_stid;
> +	cps->cp_active = false;
> +	cps->cp_timeout = jiffies + (nn->nfsd4_lease * HZ);
> +	INIT_LIST_HEAD(&cps->cp_list);
> +	list_add(&cps->cp_list, &p_stid->sc_cp_list);

What prevents concurrent nfs4_alloc_init_cpntf_state()s from running and
corrupting this list?

> +
> +	return cps;
> +out_free:
> +	kfree(cps);
> +	return NULL;
> +}
> +
> +void _nfs4_free_state(struct net *n, stateid_t *stid)

Ditto on the naming.

>  {
>  	struct nfsd_net *nn;
>  
> -	nn = net_generic(copy->cp_clp->net, nfsd_net_id);
> +	nn = net_generic(n, nfsd_net_id);
>  	spin_lock(&nn->s2s_cp_lock);
> -	idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id);
> +	idr_remove(&nn->s2s_cp_stateids, stid->si_opaque.so_id);
>  	spin_unlock(&nn->s2s_cp_lock);
>  }
>  
> +void nfs4_free_cp_state(struct nfsd4_copy *copy)
> +{
> +	_nfs4_free_state(copy->cp_clp->net, &copy->cp_stateid);
> +}
> +
> +void nfs4_free_cpntf_state(struct nfs4_cpntf_state *cps)
> +{
> +	_nfs4_free_state(cps->cp_p_stid->sc_client->net, &cps->cp_stateid);
> +	kfree(cps);
> +}
> +
> +static void nfs4_free_cpntf_statelist(struct nfs4_stid *stid)
> +{
> +	struct nfs4_cpntf_state *cps;
> +
> +	might_sleep();
> +
> +	while (!list_empty(&stid->sc_cp_list)) {
> +		cps = list_first_entry(&stid->sc_cp_list,
> +				       struct nfs4_cpntf_state, cp_list);
> +		list_del(&cps->cp_list);
> +		nfs4_free_cpntf_state(cps);
> +	}

Same question on concurrent modifications of this lock--do we need some
more locking?

--b.
Olga Kornievskaia Nov. 8, 2018, 6:29 p.m. UTC | #2
On Mon, Nov 5, 2018 at 12:50 PM J. Bruce Fields <bfields@fieldses.org> wrote:
>
> On Fri, Oct 19, 2018 at 11:29:01AM -0400, Olga Kornievskaia wrote:
> > @@ -1345,6 +1347,44 @@ struct nfsd4_copy *
> >  }
> >
> >  static __be32
> > +nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > +               union nfsd4_op_u *u)
> > +{
> > +     struct nfsd4_copy_notify *cn = &u->copy_notify;
> > +     __be32 status;
> > +     struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> > +     struct nfs4_stid *stid;
> > +     struct nfs4_cpntf_state *cps;
> > +
> > +     status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> > +                                     &cn->cpn_src_stateid, RD_STATE, NULL,
> > +                                     NULL, &stid);
> > +     if (status)
> > +             return status;
> > +
> > +     cn->cpn_sec = nn->nfsd4_lease;
> > +     cn->cpn_nsec = 0;
> > +
> > +     status = nfserrno(-ENOMEM);
> > +     cps = nfs4_alloc_init_cpntf_state(nn, stid);
> > +     if (!cps)
> > +             return status;
> > +     memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid, sizeof(stateid_t));
> > +
> > +     /**
> > +      * For now, only return one server address in cpn_src, the
> > +      * address used by the client to connect to this server.
> > +      */
> > +     cn->cpn_src.nl4_type = NL4_NETADDR;
> > +     status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
> > +                              &cn->cpn_src.u.nl4_addr);
> > +     if (status != 0)
> > +             nfs4_free_cpntf_state(cps);
>
> It looks like this would free cps while it was still on the parent's
> sc_cp_list.
>
> Is an error actually possible here? If not, just remove this check or
> replace it by WARN_ON_ONCE(status).

Yes i think it can fail (socket type is not a IPv4/v6. don't know how
real it is that it's something else)  so I need to change it so that
it's freed and removed from the list.

> > +
> > +     return status;
> > +}
> ...
> > @@ -2720,6 +2775,12 @@ static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> >               .op_name = "OP_OFFLOAD_CANCEL",
> >               .op_rsize_bop = nfsd4_only_status_rsize,
> >       },
> > +     [OP_COPY_NOTIFY] = {
> > +             .op_func = nfsd4_copy_notify,
> > +             .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
>
> CACHEME is actually only used for 4.0, let's drop it from
> nfs4.1/4.2-only operations.

Ah well COPY And CLONE had the CACHEME and thus I had it. I'll remove
it. We provably need to remove it from 4.2 ops: COPY, CLONE, ALLOCATE,
DEALLOCATE...

> > +             .op_name = "OP_COPY_NOTIFY",
> > +             .op_rsize_bop = nfsd4_copy_notify_rsize,
> > +     },
> >  };
> >
> >  /**
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index e263fd0..7764a8b 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -697,6 +697,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
> >       /* Will be incremented before return to client: */
> >       refcount_set(&stid->sc_count, 1);
> >       spin_lock_init(&stid->sc_lock);
> > +     INIT_LIST_HEAD(&stid->sc_cp_list);
> >
> >       /*
> >        * It shouldn't be a problem to reuse an opaque stateid value.
> > @@ -716,33 +717,85 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
> >  /*
> >   * Create a unique stateid_t to represent each COPY.
> >   */
> > -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> > +int _nfs4_init_state(struct nfsd_net *nn, void *ptr, stateid_t *stid)
>
> This is still copy-specific code, so the names might be more
> helpful if they left that in.  Maybe:
>
>         _nfs4_init_state -> nfs4_init_cp_state
>         nfs4_init_cp_state -> nfs4_init_copy_state
>         nfs4_init_cp_state -> nfs4_init_cpnotify_state
>
> ?  Unless you can think of something better.

I will change the names.

>
> >  {
> >       int new_id;
> >
> >       idr_preload(GFP_KERNEL);
> >       spin_lock(&nn->s2s_cp_lock);
> > -     new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
> > +     new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, ptr, 0, 0, GFP_NOWAIT);
> >       spin_unlock(&nn->s2s_cp_lock);
> >       idr_preload_end();
> >       if (new_id < 0)
> >               return 0;
> > -     copy->cp_stateid.si_opaque.so_id = new_id;
> > -     copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> > -     copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> > +     stid->si_opaque.so_id = new_id;
> > +     stid->si_opaque.so_clid.cl_boot = nn->boot_time;
> > +     stid->si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> >       return 1;
> >  }
> >
> > -void nfs4_free_cp_state(struct nfsd4_copy *copy)
> > +int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> > +{
> > +     return _nfs4_init_state(nn, copy, &copy->cp_stateid);
> > +}
> > +
> > +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
> > +                                                  struct nfs4_stid *p_stid)
> > +{
> > +     struct nfs4_cpntf_state *cps;
> > +
> > +     cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
> > +     if (!cps)
> > +             return NULL;
> > +     if (!_nfs4_init_state(nn, cps, &cps->cp_stateid))
> > +             goto out_free;
> > +     cps->cp_p_stid = p_stid;
> > +     cps->cp_active = false;
> > +     cps->cp_timeout = jiffies + (nn->nfsd4_lease * HZ);
> > +     INIT_LIST_HEAD(&cps->cp_list);
> > +     list_add(&cps->cp_list, &p_stid->sc_cp_list);
>
> What prevents concurrent nfs4_alloc_init_cpntf_state()s from running and
> corrupting this list?

Nothing. I'll introduce a new lock.

>
> > +
> > +     return cps;
> > +out_free:
> > +     kfree(cps);
> > +     return NULL;
> > +}
> > +
> > +void _nfs4_free_state(struct net *n, stateid_t *stid)
>
> Ditto on the naming.
>
> >  {
> >       struct nfsd_net *nn;
> >
> > -     nn = net_generic(copy->cp_clp->net, nfsd_net_id);
> > +     nn = net_generic(n, nfsd_net_id);
> >       spin_lock(&nn->s2s_cp_lock);
> > -     idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id);
> > +     idr_remove(&nn->s2s_cp_stateids, stid->si_opaque.so_id);
> >       spin_unlock(&nn->s2s_cp_lock);
> >  }
> >
> > +void nfs4_free_cp_state(struct nfsd4_copy *copy)
> > +{
> > +     _nfs4_free_state(copy->cp_clp->net, &copy->cp_stateid);
> > +}
> > +
> > +void nfs4_free_cpntf_state(struct nfs4_cpntf_state *cps)
> > +{
> > +     _nfs4_free_state(cps->cp_p_stid->sc_client->net, &cps->cp_stateid);
> > +     kfree(cps);
> > +}
> > +
> > +static void nfs4_free_cpntf_statelist(struct nfs4_stid *stid)
> > +{
> > +     struct nfs4_cpntf_state *cps;
> > +
> > +     might_sleep();
> > +
> > +     while (!list_empty(&stid->sc_cp_list)) {
> > +             cps = list_first_entry(&stid->sc_cp_list,
> > +                                    struct nfs4_cpntf_state, cp_list);
> > +             list_del(&cps->cp_list);
> > +             nfs4_free_cpntf_state(cps);
> > +     }
>
> Same question on concurrent modifications of this lock--do we need some
> more locking?
>
> --b.
J. Bruce Fields Nov. 8, 2018, 6:46 p.m. UTC | #3
On Thu, Nov 08, 2018 at 01:29:26PM -0500, Olga Kornievskaia wrote:
> On Mon, Nov 5, 2018 at 12:50 PM J. Bruce Fields <bfields@fieldses.org> wrote:
> >
> > On Fri, Oct 19, 2018 at 11:29:01AM -0400, Olga Kornievskaia wrote:
> > > @@ -2720,6 +2775,12 @@ static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> > >               .op_name = "OP_OFFLOAD_CANCEL",
> > >               .op_rsize_bop = nfsd4_only_status_rsize,
> > >       },
> > > +     [OP_COPY_NOTIFY] = {
> > > +             .op_func = nfsd4_copy_notify,
> > > +             .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
> >
> > CACHEME is actually only used for 4.0, let's drop it from
> > nfs4.1/4.2-only operations.
> 
> Ah well COPY And CLONE had the CACHEME and thus I had it. I'll remove
> it. We provably need to remove it from 4.2 ops: COPY, CLONE, ALLOCATE,
> DEALLOCATE...

OK, that could be a separate patch.

--b.
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 29686df..999c8cb 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -37,6 +37,7 @@ 
 #include <linux/falloc.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
+#include <linux/sunrpc/addr.h>
 
 #include "idmap.h"
 #include "cache.h"
@@ -1035,7 +1036,8 @@  static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
 static __be32
 nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		  stateid_t *src_stateid, struct file **src,
-		  stateid_t *dst_stateid, struct file **dst)
+		  stateid_t *dst_stateid, struct file **dst,
+		  struct nfs4_stid **stid)
 {
 	__be32 status;
 
@@ -1049,7 +1051,7 @@  static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					    dst_stateid, WR_STATE, dst, NULL,
-					    NULL);
+					    stid);
 	if (status) {
 		dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
 		goto out_put_src;
@@ -1080,7 +1082,7 @@  static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
 	__be32 status;
 
 	status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
-				   &clone->cl_dst_stateid, &dst);
+				   &clone->cl_dst_stateid, &dst, NULL);
 	if (status)
 		goto out;
 
@@ -1267,7 +1269,7 @@  static int nfsd4_do_async_copy(void *data)
 
 	status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
 				   &copy->file_src, &copy->cp_dst_stateid,
-				   &copy->file_dst);
+				   &copy->file_dst, NULL);
 	if (status)
 		goto out;
 
@@ -1345,6 +1347,44 @@  struct nfsd4_copy *
 }
 
 static __be32
+nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		  union nfsd4_op_u *u)
+{
+	struct nfsd4_copy_notify *cn = &u->copy_notify;
+	__be32 status;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+	struct nfs4_stid *stid;
+	struct nfs4_cpntf_state *cps;
+
+	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+					&cn->cpn_src_stateid, RD_STATE, NULL,
+					NULL, &stid);
+	if (status)
+		return status;
+
+	cn->cpn_sec = nn->nfsd4_lease;
+	cn->cpn_nsec = 0;
+
+	status = nfserrno(-ENOMEM);
+	cps = nfs4_alloc_init_cpntf_state(nn, stid);
+	if (!cps)
+		return status;
+	memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid, sizeof(stateid_t));
+
+	/**
+	 * For now, only return one server address in cpn_src, the
+	 * address used by the client to connect to this server.
+	 */
+	cn->cpn_src.nl4_type = NL4_NETADDR;
+	status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
+				 &cn->cpn_src.u.nl4_addr);
+	if (status != 0)
+		nfs4_free_cpntf_state(cps);
+
+	return status;
+}
+
+static __be32
 nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		struct nfsd4_fallocate *fallocate, int flags)
 {
@@ -2296,6 +2336,21 @@  static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
 		1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp,
+					struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size +
+		3 /* cnr_lease_time */ +
+		1 /* We support one cnr_source_server */ +
+		1 /* cnr_stateid seq */ +
+		op_encode_stateid_maxsz /* cnr_stateid */ +
+		1 /* num cnr_source_server*/ +
+		1 /* nl4_type */ +
+		1 /* nl4 size */ +
+		XDR_QUADLEN(NFS4_OPAQUE_LIMIT) /*nl4_loc + nl4_loc_sz */)
+		* sizeof(__be32);
+}
+
 #ifdef CONFIG_NFSD_PNFS
 static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
@@ -2720,6 +2775,12 @@  static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 		.op_name = "OP_OFFLOAD_CANCEL",
 		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
+	[OP_COPY_NOTIFY] = {
+		.op_func = nfsd4_copy_notify,
+		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+		.op_name = "OP_COPY_NOTIFY",
+		.op_rsize_bop = nfsd4_copy_notify_rsize,
+	},
 };
 
 /**
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e263fd0..7764a8b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -697,6 +697,7 @@  struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
 	/* Will be incremented before return to client: */
 	refcount_set(&stid->sc_count, 1);
 	spin_lock_init(&stid->sc_lock);
+	INIT_LIST_HEAD(&stid->sc_cp_list);
 
 	/*
 	 * It shouldn't be a problem to reuse an opaque stateid value.
@@ -716,33 +717,85 @@  struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
 /*
  * Create a unique stateid_t to represent each COPY.
  */
-int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+int _nfs4_init_state(struct nfsd_net *nn, void *ptr, stateid_t *stid)
 {
 	int new_id;
 
 	idr_preload(GFP_KERNEL);
 	spin_lock(&nn->s2s_cp_lock);
-	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
+	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, ptr, 0, 0, GFP_NOWAIT);
 	spin_unlock(&nn->s2s_cp_lock);
 	idr_preload_end();
 	if (new_id < 0)
 		return 0;
-	copy->cp_stateid.si_opaque.so_id = new_id;
-	copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
-	copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+	stid->si_opaque.so_id = new_id;
+	stid->si_opaque.so_clid.cl_boot = nn->boot_time;
+	stid->si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
 	return 1;
 }
 
-void nfs4_free_cp_state(struct nfsd4_copy *copy)
+int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+{
+	return _nfs4_init_state(nn, copy, &copy->cp_stateid);
+}
+
+struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+						     struct nfs4_stid *p_stid)
+{
+	struct nfs4_cpntf_state *cps;
+
+	cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
+	if (!cps)
+		return NULL;
+	if (!_nfs4_init_state(nn, cps, &cps->cp_stateid))
+		goto out_free;
+	cps->cp_p_stid = p_stid;
+	cps->cp_active = false;
+	cps->cp_timeout = jiffies + (nn->nfsd4_lease * HZ);
+	INIT_LIST_HEAD(&cps->cp_list);
+	list_add(&cps->cp_list, &p_stid->sc_cp_list);
+
+	return cps;
+out_free:
+	kfree(cps);
+	return NULL;
+}
+
+void _nfs4_free_state(struct net *n, stateid_t *stid)
 {
 	struct nfsd_net *nn;
 
-	nn = net_generic(copy->cp_clp->net, nfsd_net_id);
+	nn = net_generic(n, nfsd_net_id);
 	spin_lock(&nn->s2s_cp_lock);
-	idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id);
+	idr_remove(&nn->s2s_cp_stateids, stid->si_opaque.so_id);
 	spin_unlock(&nn->s2s_cp_lock);
 }
 
+void nfs4_free_cp_state(struct nfsd4_copy *copy)
+{
+	_nfs4_free_state(copy->cp_clp->net, &copy->cp_stateid);
+}
+
+void nfs4_free_cpntf_state(struct nfs4_cpntf_state *cps)
+{
+	_nfs4_free_state(cps->cp_p_stid->sc_client->net, &cps->cp_stateid);
+	kfree(cps);
+}
+
+static void nfs4_free_cpntf_statelist(struct nfs4_stid *stid)
+{
+	struct nfs4_cpntf_state *cps;
+
+	might_sleep();
+
+	while (!list_empty(&stid->sc_cp_list)) {
+		cps = list_first_entry(&stid->sc_cp_list,
+				       struct nfs4_cpntf_state, cp_list);
+		list_del(&cps->cp_list);
+		nfs4_free_cpntf_state(cps);
+	}
+}
+
 static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
 {
 	struct nfs4_stid *stid;
@@ -891,6 +944,7 @@  static void block_delegations(struct knfsd_fh *fh)
 	}
 	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
 	spin_unlock(&clp->cl_lock);
+	nfs4_free_cpntf_statelist(s);
 	s->sc_free(s);
 	if (fp)
 		put_nfs4_file(fp);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 9f6886f..d4945a8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1839,6 +1839,22 @@  static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
 }
 
 static __be32
+nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+			 struct nfsd4_copy_notify *cn)
+{
+	int status;
+
+	status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid);
+	if (status)
+		return status;
+	status = nfsd4_decode_nl4_server(argp, &cn->cpn_dst);
+	if (status)
+		return status;
+
+	return status;
+}
+
+static __be32
 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
 {
 	DECODE_HEAD;
@@ -1939,7 +1955,7 @@  static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
 	/* new operations for NFSv4.2 */
 	[OP_ALLOCATE]		= (nfsd4_dec)nfsd4_decode_fallocate,
 	[OP_COPY]		= (nfsd4_dec)nfsd4_decode_copy,
-	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_copy_notify,
 	[OP_DEALLOCATE]		= (nfsd4_dec)nfsd4_decode_fallocate,
 	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_LAYOUTERROR]	= (nfsd4_dec)nfsd4_decode_notsupp,
@@ -4324,6 +4340,45 @@  static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 }
 
 static __be32
+nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	struct nfs42_netaddr *addr;
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	*p++ = cpu_to_be32(ns->nl4_type);
+
+	switch (ns->nl4_type) {
+	case NL4_NETADDR:
+		addr = &ns->u.nl4_addr;
+
+		/** netid_len, netid, uaddr_len, uaddr (port included
+		 * in RPCBIND_MAXUADDRLEN)
+		 */
+		p = xdr_reserve_space(xdr,
+			4 /* netid len */ +
+			(XDR_QUADLEN(addr->netid_len) * 4) +
+			4 /* uaddr len */ +
+			(XDR_QUADLEN(addr->addr_len) * 4));
+		if (!p)
+			return nfserr_resource;
+
+		*p++ = cpu_to_be32(addr->netid_len);
+		p = xdr_encode_opaque_fixed(p, addr->netid,
+					    addr->netid_len);
+		*p++ = cpu_to_be32(addr->addr_len);
+		p = xdr_encode_opaque_fixed(p, addr->addr,
+					addr->addr_len);
+		break;
+	default:
+		WARN_ON(ns->nl4_type != NL4_NETADDR);
+	}
+
+	return 0;
+}
+
+static __be32
 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_copy *copy)
 {
@@ -4360,6 +4415,44 @@  static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 }
 
 static __be32
+nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
+			 struct nfsd4_copy_notify *cn)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	__be32 *p;
+
+	if (nfserr)
+		return nfserr;
+
+	/* 8 sec, 4 nsec */
+	p = xdr_reserve_space(xdr, 12);
+	if (!p)
+		return nfserr_resource;
+
+	/* cnr_lease_time */
+	p = xdr_encode_hyper(p, cn->cpn_sec);
+	*p++ = cpu_to_be32(cn->cpn_nsec);
+
+	/* cnr_stateid */
+	nfserr = nfsd4_encode_stateid(xdr, &cn->cpn_cnr_stateid);
+	if (nfserr)
+		return nfserr;
+
+	/* cnr_src.nl_nsvr */
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return nfserr_resource;
+
+	*p++ = cpu_to_be32(1);
+
+	nfserr = nfsd42_encode_nl4_server(resp, &cn->cpn_src);
+	if (nfserr)
+		return nfserr;
+
+	return nfserr;
+}
+
+static __be32
 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_seek *seek)
 {
@@ -4456,7 +4549,7 @@  static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 	/* NFSv4.2 operations */
 	[OP_ALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_COPY]		= (nfsd4_enc)nfsd4_encode_copy,
-	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_copy_notify,
 	[OP_DEALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_IO_ADVISE]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_LAYOUTERROR]	= (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 304de3b..6bfac6a 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -94,6 +94,7 @@  struct nfs4_stid {
 #define NFS4_REVOKED_DELEG_STID 16
 #define NFS4_CLOSED_DELEG_STID 32
 #define NFS4_LAYOUT_STID 64
+	struct list_head	sc_cp_list;
 	unsigned char		sc_type;
 	stateid_t		sc_stateid;
 	spinlock_t		sc_lock;
@@ -102,6 +103,17 @@  struct nfs4_stid {
 	void			(*sc_free)(struct nfs4_stid *);
 };
 
+/* Keep a list of stateids issued by the COPY_NOTIFY, associate it with the
+ * parent OPEN/LOCK/DELEG stateid.
+ */
+struct nfs4_cpntf_state {
+	stateid_t		cp_stateid;
+	struct list_head	cp_list;	/* per parent nfs4_stid */
+	struct nfs4_stid	*cp_p_stid;	/* pointer to parent */
+	bool			cp_active;	/* has the copy started */
+	unsigned long		cp_timeout;	/* copy timeout */
+};
+
 /*
  * Represents a delegation stateid. The nfs4_client holds references to these
  * and they are put when it is being destroyed or when the delegation is
@@ -615,6 +627,9 @@  struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
 				  void (*sc_free)(struct nfs4_stid *));
 int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
 void nfs4_free_cp_state(struct nfsd4_copy *copy);
+struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+			struct nfs4_stid *p_stid);
+void nfs4_free_cpntf_state(struct nfs4_cpntf_state *cps);
 void nfs4_unhash_stid(struct nfs4_stid *s);
 void nfs4_put_stid(struct nfs4_stid *s);
 void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index b4d1140..4557f15 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -567,6 +567,18 @@  struct nfsd4_offload_status {
 	u32		status;
 };
 
+struct nfsd4_copy_notify {
+	/* request */
+	stateid_t		cpn_src_stateid;
+	struct nl4_server	cpn_dst;
+
+	/* response */
+	stateid_t		cpn_cnr_stateid;
+	u64			cpn_sec;
+	u32			cpn_nsec;
+	struct nl4_server	cpn_src;
+};
+
 struct nfsd4_op {
 	int					opnum;
 	const struct nfsd4_operation *		opdesc;
@@ -626,6 +638,7 @@  struct nfsd4_op {
 		struct nfsd4_clone		clone;
 		struct nfsd4_copy		copy;
 		struct nfsd4_offload_status	offload_status;
+		struct nfsd4_copy_notify	copy_notify;
 		struct nfsd4_seek		seek;
 	} u;
 	struct nfs4_replay *			replay;