[v6,07/10] NFSD create new stateid for async copy
diff mbox

Message ID 20171024174752.74910-8-kolga@netapp.com
State New
Headers show

Commit Message

Olga Kornievskaia Oct. 24, 2017, 5:47 p.m. UTC
Generate a new stateid to be used for reply to the asynchronous
COPY (this would also be used later by COPY_NOTIFY as well).
Associate the stateid with the parent OPEN/LOCK/DELEG stateid
that can be freed during the free of the parent stateid. However,
right now deciding to bind the lifetime to when the vfs copy
is done. This way don't need to keep the nfsd_net structure for
the callback. The drawback is that time copy state information
is available for query by OFFLOAD_STATUS is slightly less.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfsd/netns.h     |  8 +++++++
 fs/nfsd/nfs4proc.c  | 32 +++++++++++++++++++-------
 fs/nfsd/nfs4state.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfsctl.c    |  1 +
 fs/nfsd/state.h     | 14 ++++++++++++
 fs/nfsd/xdr4.h      |  2 ++
 6 files changed, 115 insertions(+), 8 deletions(-)

Comments

J. Bruce Fields Jan. 26, 2018, 9:37 p.m. UTC | #1
On Tue, Oct 24, 2017 at 01:47:49PM -0400, Olga Kornievskaia wrote:
> +/*
> + * Create a unique stateid_t to represent each COPY. Hang the copy
> + * stateids off the OPEN/LOCK/DELEG stateid from the client open
> + * the source file.
> + */
> +struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn,
> +					       struct nfs4_stid *p_stid)
> +{
> +	struct nfs4_cp_state *cps;
> +	int new_id;
> +
> +	cps = kzalloc(sizeof(struct nfs4_cp_state), GFP_KERNEL);
> +	if (!cps)
> +		return NULL;
> +	idr_preload(GFP_KERNEL);
> +	spin_lock(&nn->s2s_cp_lock);
> +	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, cps, 0, 0, GFP_NOWAIT);
> +	spin_unlock(&nn->s2s_cp_lock);
> +	idr_preload_end();
> +	if (new_id < 0)
> +		goto out_free;
> +	cps->cp_stateid.si_opaque.so_id = new_id;
> +	cps->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> +	cps->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> +	cps->cp_p_stid = p_stid;
> +	INIT_LIST_HEAD(&cps->cp_list);

The INIT_LIST_HEAD is redundant here, it'll just be overridden by the
list_add below.

> +	list_add(&cps->cp_list, &p_stid->sc_cp_list);

p_stid is a preexisting stateid that's visible to other nfsd threads
too, right?  So in theory another COPY using the same parent stateid
could be running concurrently with this one?

Maybe you could just use p_stid->sc_lock  for this.

Ditto for any other manipulations of that list.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
J. Bruce Fields Jan. 26, 2018, 9:59 p.m. UTC | #2
On Tue, Oct 24, 2017 at 01:47:49PM -0400, Olga Kornievskaia wrote:
> Generate a new stateid to be used for reply to the asynchronous
> COPY (this would also be used later by COPY_NOTIFY as well).
> Associate the stateid with the parent OPEN/LOCK/DELEG stateid
> that can be freed during the free of the parent stateid. However,
> right now deciding to bind the lifetime to when the vfs copy
> is done. This way don't need to keep the nfsd_net structure for
> the callback. The drawback is that time copy state information
> is available for query by OFFLOAD_STATUS is slightly less.
> 
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  fs/nfsd/netns.h     |  8 +++++++
>  fs/nfsd/nfs4proc.c  | 32 +++++++++++++++++++-------
>  fs/nfsd/nfs4state.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/nfsd/nfsctl.c    |  1 +
>  fs/nfsd/state.h     | 14 ++++++++++++
>  fs/nfsd/xdr4.h      |  2 ++
>  6 files changed, 115 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 3714231..2c88a95 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -119,6 +119,14 @@ struct nfsd_net {
>  	u32 clverifier_counter;
>  
>  	struct svc_serv *nfsd_serv;
> +
> +	/*
> +	 * clientid and stateid data for construction of net unique COPY
> +	 * stateids.
> +	 */
> +	u32		s2s_cp_cl_id;
> +	struct idr	s2s_cp_stateids;
> +	spinlock_t	s2s_cp_lock;
>  };
>  
>  /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index a020533..6876080 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1039,7 +1039,8 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>  static __be32
>  nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		  stateid_t *src_stateid, struct file **src,
> -		  stateid_t *dst_stateid, struct file **dst)
> +		  stateid_t *dst_stateid, struct file **dst,
> +		  struct nfs4_stid **stid)
>  {
>  	__be32 status;
>  
> @@ -1053,7 +1054,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>  
>  	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>  					    dst_stateid, WR_STATE, dst, NULL,
> -					    NULL);
> +					    stid);
>  	if (status) {
>  		dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
>  		goto out_put_src;
> @@ -1084,7 +1085,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>  	__be32 status;
>  
>  	status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
> -				   &clone->cl_dst_stateid, &dst);
> +				   &clone->cl_dst_stateid, &dst, NULL);
>  	if (status)
>  		goto out;
>  
> @@ -1117,8 +1118,6 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
>  
>  static int nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
>  {
> -	memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
> -		sizeof(copy->cp_dst_stateid));
>  	copy->cp_res.wr_stable_how = NFS_UNSTABLE;
>  	copy->cp_consecutive = 1;
>  	copy->cp_synchronous = sync;
> @@ -1180,10 +1179,15 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
>  	atomic_inc(&dst->cp_clp->cl_refcount);
>  	dst->fh_dst = get_file(src->fh_dst);
>  	dst->fh_src = get_file(src->fh_src);
> +	dst->stid = src->stid;
> +	dst->cps = src->cps;
>  }
>  
>  static void cleanup_async_copy(struct nfsd4_copy *copy)
>  {
> +	list_del(&copy->cps->cp_list);
> +	nfs4_free_cp_state(copy->cps);
> +	nfs4_put_stid(copy->stid);
>  	fput(copy->fh_dst);
>  	fput(copy->fh_src);
>  	spin_lock(&copy->cp_clp->async_lock);
> @@ -1225,7 +1229,7 @@ static int nfsd4_do_async_copy(void *data)
>  
>  	status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
>  				   &copy->fh_src, &copy->cp_dst_stateid,
> -				   &copy->fh_dst);
> +				   &copy->fh_dst, &copy->stid);
>  	if (status)
>  		goto out;
>  
> @@ -1234,15 +1238,27 @@ static int nfsd4_do_async_copy(void *data)
>  		sizeof(struct knfsd_fh));
>  	copy->net = SVC_NET(rqstp);
>  	if (!copy->cp_synchronous) {
> +		struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> +
>  		status = nfsd4_init_copy_res(copy, 0);
>  		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
>  		if (!async_copy) {
>  			status = nfserrno(-ENOMEM);
>  			goto out;
>  		}
> +		copy->cps = nfs4_alloc_init_cp_state(nn, copy->stid);
> +		if (!copy->cps) {
> +			status = nfserrno(-ENOMEM);
> +			kfree(async_copy);
> +			goto out;
> +		}
> +		/* take a reference on the parent stateid so it's not
> +		 * not freed by the copy compound
> +		 */
> +		atomic_inc(&copy->stid->sc_count);
> +		memcpy(&copy->cp_res.cb_stateid, &copy->cps->cp_stateid,
> +			sizeof(copy->cps->cp_stateid));
>  		dup_copy_fields(copy, async_copy);
> -		memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
> -			sizeof(copy->cp_dst_stateid));
>  		spin_lock(&async_copy->cp_clp->async_lock);
>  		list_add(&async_copy->copies,
>  				&async_copy->cp_clp->async_copies);
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index af40762..f9151f2 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -658,6 +658,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>  	/* Will be incremented before return to client: */
>  	atomic_set(&stid->sc_count, 1);
>  	spin_lock_init(&stid->sc_lock);
> +	INIT_LIST_HEAD(&stid->sc_cp_list);
>  
>  	/*
>  	 * It shouldn't be a problem to reuse an opaque stateid value.
> @@ -674,6 +675,66 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>  	return NULL;
>  }
>  
> +/*
> + * Create a unique stateid_t to represent each COPY. Hang the copy
> + * stateids off the OPEN/LOCK/DELEG stateid from the client open
> + * the source file.
> + */
> +struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn,
> +					       struct nfs4_stid *p_stid)
> +{
> +	struct nfs4_cp_state *cps;
> +	int new_id;
> +
> +	cps = kzalloc(sizeof(struct nfs4_cp_state), GFP_KERNEL);
> +	if (!cps)
> +		return NULL;
> +	idr_preload(GFP_KERNEL);
> +	spin_lock(&nn->s2s_cp_lock);
> +	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, cps, 0, 0, GFP_NOWAIT);
> +	spin_unlock(&nn->s2s_cp_lock);
> +	idr_preload_end();
> +	if (new_id < 0)
> +		goto out_free;
> +	cps->cp_stateid.si_opaque.so_id = new_id;
> +	cps->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> +	cps->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> +	cps->cp_p_stid = p_stid;
> +	INIT_LIST_HEAD(&cps->cp_list);
> +	list_add(&cps->cp_list, &p_stid->sc_cp_list);
> +
> +	return cps;
> +out_free:
> +	kfree(cps);
> +	return NULL;
> +}
> +
> +void nfs4_free_cp_state(struct nfs4_cp_state *cps)
> +{
> +	struct nfsd_net *nn;
> +
> +	nn = net_generic(cps->cp_p_stid->sc_client->net, nfsd_net_id);
> +	spin_lock(&nn->s2s_cp_lock);
> +	idr_remove(&nn->s2s_cp_stateids, cps->cp_stateid.si_opaque.so_id);
> +	spin_unlock(&nn->s2s_cp_lock);
> +
> +	kfree(cps);
> +}
> +
> +static void nfs4_free_cp_statelist(struct nfs4_stid *stid)
> +{
> +	struct nfs4_cp_state *cps;
> +
> +	might_sleep();
> +
> +	while (!list_empty(&stid->sc_cp_list)) {
> +		cps = list_first_entry(&stid->sc_cp_list, struct nfs4_cp_state,
> +				       cp_list);
> +		list_del(&cps->cp_list);
> +		nfs4_free_cp_state(cps);
> +	}
> +}
> +
>  static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
>  {
>  	struct nfs4_stid *stid;
> @@ -819,6 +880,9 @@ static void block_delegations(struct knfsd_fh *fh)
>  	}
>  	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
>  	spin_unlock(&clp->cl_lock);
> +
> +	nfs4_free_cp_statelist(s);

Is this really safe if there are still ongoing copies?

I think the copies take a reference count on the parent stateid, so I
think we should never actually get here: perhaps this could just be a
WARN_ON(!list_empty(&s->s_cp_list)).  Though nfsd4_copy puts things on
this list before bumping the sc_count, so there may be a race there.

What is supposed to happen, by the way, if a close arrives for a stateid
with a copy in progress?  Do we cancel the copy before returning from
the close, or do we fail the close?  (Same question for unlock and
delegation return, I guess.)

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olga Kornievskaia Feb. 2, 2018, 8:45 p.m. UTC | #3
On Fri, Jan 26, 2018 at 4:59 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> On Tue, Oct 24, 2017 at 01:47:49PM -0400, Olga Kornievskaia wrote:
>> Generate a new stateid to be used for reply to the asynchronous
>> COPY (this would also be used later by COPY_NOTIFY as well).
>> Associate the stateid with the parent OPEN/LOCK/DELEG stateid
>> that can be freed during the free of the parent stateid. However,
>> right now deciding to bind the lifetime to when the vfs copy
>> is done. This way don't need to keep the nfsd_net structure for
>> the callback. The drawback is that time copy state information
>> is available for query by OFFLOAD_STATUS is slightly less.
>>
>> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
>> ---
>>  fs/nfsd/netns.h     |  8 +++++++
>>  fs/nfsd/nfs4proc.c  | 32 +++++++++++++++++++-------
>>  fs/nfsd/nfs4state.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  fs/nfsd/nfsctl.c    |  1 +
>>  fs/nfsd/state.h     | 14 ++++++++++++
>>  fs/nfsd/xdr4.h      |  2 ++
>>  6 files changed, 115 insertions(+), 8 deletions(-)
>>
>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
>> index 3714231..2c88a95 100644
>> --- a/fs/nfsd/netns.h
>> +++ b/fs/nfsd/netns.h
>> @@ -119,6 +119,14 @@ struct nfsd_net {
>>       u32 clverifier_counter;
>>
>>       struct svc_serv *nfsd_serv;
>> +
>> +     /*
>> +      * clientid and stateid data for construction of net unique COPY
>> +      * stateids.
>> +      */
>> +     u32             s2s_cp_cl_id;
>> +     struct idr      s2s_cp_stateids;
>> +     spinlock_t      s2s_cp_lock;
>>  };
>>
>>  /* Simple check to find out if a given net was properly initialized */
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index a020533..6876080 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -1039,7 +1039,8 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>>  static __be32
>>  nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>                 stateid_t *src_stateid, struct file **src,
>> -               stateid_t *dst_stateid, struct file **dst)
>> +               stateid_t *dst_stateid, struct file **dst,
>> +               struct nfs4_stid **stid)
>>  {
>>       __be32 status;
>>
>> @@ -1053,7 +1054,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>>
>>       status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>>                                           dst_stateid, WR_STATE, dst, NULL,
>> -                                         NULL);
>> +                                         stid);
>>       if (status) {
>>               dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
>>               goto out_put_src;
>> @@ -1084,7 +1085,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
>>       __be32 status;
>>
>>       status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
>> -                                &clone->cl_dst_stateid, &dst);
>> +                                &clone->cl_dst_stateid, &dst, NULL);
>>       if (status)
>>               goto out;
>>
>> @@ -1117,8 +1118,6 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
>>
>>  static int nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
>>  {
>> -     memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
>> -             sizeof(copy->cp_dst_stateid));
>>       copy->cp_res.wr_stable_how = NFS_UNSTABLE;
>>       copy->cp_consecutive = 1;
>>       copy->cp_synchronous = sync;
>> @@ -1180,10 +1179,15 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
>>       atomic_inc(&dst->cp_clp->cl_refcount);
>>       dst->fh_dst = get_file(src->fh_dst);
>>       dst->fh_src = get_file(src->fh_src);
>> +     dst->stid = src->stid;
>> +     dst->cps = src->cps;
>>  }
>>
>>  static void cleanup_async_copy(struct nfsd4_copy *copy)
>>  {
>> +     list_del(&copy->cps->cp_list);
>> +     nfs4_free_cp_state(copy->cps);
>> +     nfs4_put_stid(copy->stid);
>>       fput(copy->fh_dst);
>>       fput(copy->fh_src);
>>       spin_lock(&copy->cp_clp->async_lock);
>> @@ -1225,7 +1229,7 @@ static int nfsd4_do_async_copy(void *data)
>>
>>       status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
>>                                  &copy->fh_src, &copy->cp_dst_stateid,
>> -                                &copy->fh_dst);
>> +                                &copy->fh_dst, &copy->stid);
>>       if (status)
>>               goto out;
>>
>> @@ -1234,15 +1238,27 @@ static int nfsd4_do_async_copy(void *data)
>>               sizeof(struct knfsd_fh));
>>       copy->net = SVC_NET(rqstp);
>>       if (!copy->cp_synchronous) {
>> +             struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
>> +
>>               status = nfsd4_init_copy_res(copy, 0);
>>               async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
>>               if (!async_copy) {
>>                       status = nfserrno(-ENOMEM);
>>                       goto out;
>>               }
>> +             copy->cps = nfs4_alloc_init_cp_state(nn, copy->stid);
>> +             if (!copy->cps) {
>> +                     status = nfserrno(-ENOMEM);
>> +                     kfree(async_copy);
>> +                     goto out;
>> +             }
>> +             /* take a reference on the parent stateid so it's not
>> +              * not freed by the copy compound
>> +              */
>> +             atomic_inc(&copy->stid->sc_count);
>> +             memcpy(&copy->cp_res.cb_stateid, &copy->cps->cp_stateid,
>> +                     sizeof(copy->cps->cp_stateid));
>>               dup_copy_fields(copy, async_copy);
>> -             memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
>> -                     sizeof(copy->cp_dst_stateid));
>>               spin_lock(&async_copy->cp_clp->async_lock);
>>               list_add(&async_copy->copies,
>>                               &async_copy->cp_clp->async_copies);
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index af40762..f9151f2 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -658,6 +658,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>>       /* Will be incremented before return to client: */
>>       atomic_set(&stid->sc_count, 1);
>>       spin_lock_init(&stid->sc_lock);
>> +     INIT_LIST_HEAD(&stid->sc_cp_list);
>>
>>       /*
>>        * It shouldn't be a problem to reuse an opaque stateid value.
>> @@ -674,6 +675,66 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>>       return NULL;
>>  }
>>
>> +/*
>> + * Create a unique stateid_t to represent each COPY. Hang the copy
>> + * stateids off the OPEN/LOCK/DELEG stateid from the client open
>> + * the source file.
>> + */
>> +struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn,
>> +                                            struct nfs4_stid *p_stid)
>> +{
>> +     struct nfs4_cp_state *cps;
>> +     int new_id;
>> +
>> +     cps = kzalloc(sizeof(struct nfs4_cp_state), GFP_KERNEL);
>> +     if (!cps)
>> +             return NULL;
>> +     idr_preload(GFP_KERNEL);
>> +     spin_lock(&nn->s2s_cp_lock);
>> +     new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, cps, 0, 0, GFP_NOWAIT);
>> +     spin_unlock(&nn->s2s_cp_lock);
>> +     idr_preload_end();
>> +     if (new_id < 0)
>> +             goto out_free;
>> +     cps->cp_stateid.si_opaque.so_id = new_id;
>> +     cps->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
>> +     cps->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
>> +     cps->cp_p_stid = p_stid;
>> +     INIT_LIST_HEAD(&cps->cp_list);
>> +     list_add(&cps->cp_list, &p_stid->sc_cp_list);
>> +
>> +     return cps;
>> +out_free:
>> +     kfree(cps);
>> +     return NULL;
>> +}
>> +
>> +void nfs4_free_cp_state(struct nfs4_cp_state *cps)
>> +{
>> +     struct nfsd_net *nn;
>> +
>> +     nn = net_generic(cps->cp_p_stid->sc_client->net, nfsd_net_id);
>> +     spin_lock(&nn->s2s_cp_lock);
>> +     idr_remove(&nn->s2s_cp_stateids, cps->cp_stateid.si_opaque.so_id);
>> +     spin_unlock(&nn->s2s_cp_lock);
>> +
>> +     kfree(cps);
>> +}
>> +
>> +static void nfs4_free_cp_statelist(struct nfs4_stid *stid)
>> +{
>> +     struct nfs4_cp_state *cps;
>> +
>> +     might_sleep();
>> +
>> +     while (!list_empty(&stid->sc_cp_list)) {
>> +             cps = list_first_entry(&stid->sc_cp_list, struct nfs4_cp_state,
>> +                                    cp_list);
>> +             list_del(&cps->cp_list);
>> +             nfs4_free_cp_state(cps);
>> +     }
>> +}
>> +
>>  static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
>>  {
>>       struct nfs4_stid *stid;
>> @@ -819,6 +880,9 @@ static void block_delegations(struct knfsd_fh *fh)
>>       }
>>       idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
>>       spin_unlock(&clp->cl_lock);
>> +
>> +     nfs4_free_cp_statelist(s);
>
> Is this really safe if there are still ongoing copies?

You are right. It is not.

> I think the copies take a reference count on the parent stateid, so I
> think we should never actually get here: perhaps this could just be a
> WARN_ON(!list_empty(&s->s_cp_list)).  Though nfsd4_copy puts things on
> this list before bumping the sc_count, so there may be a race there.
>
> What is supposed to happen, by the way, if a close arrives for a stateid
> with a copy in progress?  Do we cancel the copy before returning from
> the close, or do we fail the close?  (Same question for unlock and
> delegation return, I guess.)

Good questions. I think a normal client shouldn't send CLOSE/UNLOCK
while there is an on-going copy. I could see how a DELEGRETURN could
be coming in the middle of the copy (because the client received a
CB_RECALL).

For an easy solution my proposal is to do the following. Upon
receiving close, unlock, delegreturn, if the copy list is non-empty,
then we send an ERR_DELAY back until the copy is done.

Another proposal is to kill the copy thread but that seems too drastic
(specially in the case of a delegreturn). Right now when the copy
thread is killed, it does not send a reply back to the client (as it
assumes the client sent the OFFLOAD_CANCEL and doesn't need a reply).
I could change that to send a reply back. In case the copy used a
delegation stateid, then the next copy would choose a different
stateid. This could be done for the delegation stateid and ERR_DELAY
for the close/unlock.

Is either one acceptable and if so which one sounds better to you?


>
> --b.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
J. Bruce Fields Feb. 2, 2018, 9:45 p.m. UTC | #4
On Fri, Feb 02, 2018 at 03:45:28PM -0500, Olga Kornievskaia wrote:
> On Fri, Jan 26, 2018 at 4:59 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> > What is supposed to happen, by the way, if a close arrives for a stateid
> > with a copy in progress?  Do we cancel the copy before returning from
> > the close, or do we fail the close?  (Same question for unlock and
> > delegation return, I guess.)
> 
> Good questions. I think a normal client shouldn't send CLOSE/UNLOCK
> while there is an on-going copy. I could see how a DELEGRETURN could
> be coming in the middle of the copy (because the client received a
> CB_RECALL).
> 
> For an easy solution my proposal is to do the following. Upon
> receiving close, unlock, delegreturn, if the copy list is non-empty,
> then we send an ERR_DELAY back until the copy is done.
> 
> Another proposal is to kill the copy thread but that seems too drastic
> (specially in the case of a delegreturn). Right now when the copy
> thread is killed, it does not send a reply back to the client (as it
> assumes the client sent the OFFLOAD_CANCEL and doesn't need a reply).
> I could change that to send a reply back. In case the copy used a
> delegation stateid, then the next copy would choose a different
> stateid. This could be done for the delegation stateid and ERR_DELAY
> for the close/unlock.
> 
> Is either one acceptable and if so which one sounds better to you?

If we're confident this is something that only a misbehaving client
would do, then I'm OK with choosing whatever's simplest.  I guess we
should double-check with the spec to make sure this case hasn't already
been covered there.  Returning an error on the conflicting operation
sounds good to me.

Is it really OK to do a COPY with a delegation stateid?  That sounds
messy if the delegation is revoked partway through the copy.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olga Kornievskaia Feb. 15, 2018, 10:18 p.m. UTC | #5
On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> On Fri, Feb 02, 2018 at 03:45:28PM -0500, Olga Kornievskaia wrote:
>> On Fri, Jan 26, 2018 at 4:59 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
>> > What is supposed to happen, by the way, if a close arrives for a stateid
>> > with a copy in progress?  Do we cancel the copy before returning from
>> > the close, or do we fail the close?  (Same question for unlock and
>> > delegation return, I guess.)
>>
>> Good questions. I think a normal client shouldn't send CLOSE/UNLOCK
>> while there is an on-going copy. I could see how a DELEGRETURN could
>> be coming in the middle of the copy (because the client received a
>> CB_RECALL).
>>
>> For an easy solution my proposal is to do the following. Upon
>> receiving close, unlock, delegreturn, if the copy list is non-empty,
>> then we send an ERR_DELAY back until the copy is done.
>>
>> Another proposal is to kill the copy thread but that seems too drastic
>> (specially in the case of a delegreturn). Right now when the copy
>> thread is killed, it does not send a reply back to the client (as it
>> assumes the client sent the OFFLOAD_CANCEL and doesn't need a reply).
>> I could change that to send a reply back. In case the copy used a
>> delegation stateid, then the next copy would choose a different
>> stateid. This could be done for the delegation stateid and ERR_DELAY
>> for the close/unlock.
>>
>> Is either one acceptable and if so which one sounds better to you?
>
> If we're confident this is something that only a misbehaving client
> would do, then I'm OK with choosing whatever's simplest.  I guess we
> should double-check with the spec to make sure this case hasn't already
> been covered there.  Returning an error on the conflicting operation
> sounds good to me.
>
> Is it really OK to do a COPY with a delegation stateid?  That sounds
> messy if the delegation is revoked partway through the copy.

Yes, according to the spec, it is ok to use any of the stateids.

RFC 7862 15.2.3
The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE
   operation and follows the rules for stateids in Sections 8.2.5 and
   18.32.3 of [RFC5661].... while for an intra-server copy
ca_src_stateid MUST refer
   to a stateid that is valid for a READ operation and follows the rules
   for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661].

8.2.5 is the section that says in the decreasing priority to choose
delegation stateid, locking stateid, open stateid.

Trying to deal with the delegation stateid seems to be rather complex.
Here's what I could do (which I mentioned before already):
1. possibly change the client to never use a delegation stateid.
2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty.
3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any
on-going copies. Change the nfsd copy to send a reply if it was killed
with a signal. What I'm not sure if when it's killed I get any partial
bytes from the VFS, I don't think so. I think in that case, the only
thing that I might reply with is a 0bytes copy. And whatever client
that did sent a copy with a delegation stateid would deal with
restarting it with a different stateid.

Having said all that, I'd like to ask why do you think handling
CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what
is currently there for the synchronous copy. Right now if the server
receives those operations it does nothing to the on-going synchronous
copy.

As you noted, the async copy takes a reference on the parent stateid
so the state won't go away while the async copy is going on. Why not
keep async copy same as the synchronous one?
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
J. Bruce Fields Feb. 16, 2018, 1:43 a.m. UTC | #6
On Thu, Feb 15, 2018 at 05:18:55PM -0500, Olga Kornievskaia wrote:
> On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> > Is it really OK to do a COPY with a delegation stateid?  That sounds
> > messy if the delegation is revoked partway through the copy.
> 
> Yes, according to the spec, it is ok to use any of the stateids.
> 
> RFC 7862 15.2.3
> The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE
>    operation and follows the rules for stateids in Sections 8.2.5 and
>    18.32.3 of [RFC5661].... while for an intra-server copy
> ca_src_stateid MUST refer
>    to a stateid that is valid for a READ operation and follows the rules
>    for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661].
> 
> 8.2.5 is the section that says in the decreasing priority to choose
> delegation stateid, locking stateid, open stateid.
> 
> Trying to deal with the delegation stateid seems to be rather complex.
> Here's what I could do (which I mentioned before already):
> 1. possibly change the client to never use a delegation stateid.
> 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty.
> 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any
> on-going copies. Change the nfsd copy to send a reply if it was killed
> with a signal. What I'm not sure if when it's killed I get any partial
> bytes from the VFS, I don't think so. I think in that case, the only
> thing that I might reply with is a 0bytes copy. And whatever client
> that did sent a copy with a delegation stateid would deal with
> restarting it with a different stateid.
> 
> Having said all that, I'd like to ask why do you think handling
> CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what
> is currently there for the synchronous copy. Right now if the server
> receives those operations it does nothing to the on-going synchronous
> copy.

Or for that matter, why should it be different from READ and
WRITE--looks like those can also continue executing after a close, too.
So, good point.

> As you noted, the async copy takes a reference on the parent stateid
> so the state won't go away while the async copy is going on. Why not
> keep async copy same as the synchronous one?

OK, so maybe it's not such a big deal.

I'm still confused about the delegation case, though: a copy can take a
very long time, so we can't just wait for the copy to end before
revoking the delegation.  So do we allow the copy to continue
indefinitely even after the delegation stateid it was initiated with has
long ago been revoked?

Maybe it's not a problem.  I guess I'm having trouble remembering why
copies (or IO in general) uses stateids in the first place....

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olga Kornievskaia Feb. 16, 2018, 4:06 p.m. UTC | #7
On Thu, Feb 15, 2018 at 8:43 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> On Thu, Feb 15, 2018 at 05:18:55PM -0500, Olga Kornievskaia wrote:
>> On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote:
>> > Is it really OK to do a COPY with a delegation stateid?  That sounds
>> > messy if the delegation is revoked partway through the copy.
>>
>> Yes, according to the spec, it is ok to use any of the stateids.
>>
>> RFC 7862 15.2.3
>> The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE
>>    operation and follows the rules for stateids in Sections 8.2.5 and
>>    18.32.3 of [RFC5661].... while for an intra-server copy
>> ca_src_stateid MUST refer
>>    to a stateid that is valid for a READ operation and follows the rules
>>    for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661].
>>
>> 8.2.5 is the section that says in the decreasing priority to choose
>> delegation stateid, locking stateid, open stateid.
>>
>> Trying to deal with the delegation stateid seems to be rather complex.
>> Here's what I could do (which I mentioned before already):
>> 1. possibly change the client to never use a delegation stateid.
>> 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty.
>> 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any
>> on-going copies. Change the nfsd copy to send a reply if it was killed
>> with a signal. What I'm not sure if when it's killed I get any partial
>> bytes from the VFS, I don't think so. I think in that case, the only
>> thing that I might reply with is a 0bytes copy. And whatever client
>> that did sent a copy with a delegation stateid would deal with
>> restarting it with a different stateid.
>>
>> Having said all that, I'd like to ask why do you think handling
>> CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what
>> is currently there for the synchronous copy. Right now if the server
>> receives those operations it does nothing to the on-going synchronous
>> copy.
>
> Or for that matter, why should it be different from READ and
> WRITE--looks like those can also continue executing after a close, too.
> So, good point.
>
>> As you noted, the async copy takes a reference on the parent stateid
>> so the state won't go away while the async copy is going on. Why not
>> keep async copy same as the synchronous one?
>
> OK, so maybe it's not such a big deal.
>
> I'm still confused about the delegation case, though: a copy can take a
> very long time, so we can't just wait for the copy to end before
> revoking the delegation.  So do we allow the copy to continue
> indefinitely even after the delegation stateid it was initiated with has
> long ago been revoked?
>
> Maybe it's not a problem.  I guess I'm having trouble remembering why
> copies (or IO in general) uses stateids in the first place....

Sigh. No you are right, we shouldn't let it run long past the
revocation. With IO operations, once it starts an operation like
CLOSE/UNLOCK/DELEGRETURN can arrive at the server and it's ok because
IO isn't long, and once it's done the client will notice a change in
stateid and will choose a different stateid to do the next IO
operation. I guess that's why a synchronous copy is still bordering ok
because it's relatively short and the next copy will choose a
different stateid.

Unless there are any big objections, I'll make the following changes
1. CLOSE/UNLOCK will check for none-empty list and return ERR_DELAY.
2. change the copy code to once received a signal to return the
partial copy back.
3. DELEGRETURN will stop/kill any on-going copies, which would trigger
a partial copy reply and the next copy should choose a different
stateid.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
J. Bruce Fields Feb. 16, 2018, 6:12 p.m. UTC | #8
On Fri, Feb 16, 2018 at 11:06:07AM -0500, Olga Kornievskaia wrote:
> On Thu, Feb 15, 2018 at 8:43 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> > On Thu, Feb 15, 2018 at 05:18:55PM -0500, Olga Kornievskaia wrote:
> >> On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> >> > Is it really OK to do a COPY with a delegation stateid?  That sounds
> >> > messy if the delegation is revoked partway through the copy.
> >>
> >> Yes, according to the spec, it is ok to use any of the stateids.
> >>
> >> RFC 7862 15.2.3
> >> The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE
> >>    operation and follows the rules for stateids in Sections 8.2.5 and
> >>    18.32.3 of [RFC5661].... while for an intra-server copy
> >> ca_src_stateid MUST refer
> >>    to a stateid that is valid for a READ operation and follows the rules
> >>    for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661].
> >>
> >> 8.2.5 is the section that says in the decreasing priority to choose
> >> delegation stateid, locking stateid, open stateid.
> >>
> >> Trying to deal with the delegation stateid seems to be rather complex.
> >> Here's what I could do (which I mentioned before already):
> >> 1. possibly change the client to never use a delegation stateid.
> >> 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty.
> >> 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any
> >> on-going copies. Change the nfsd copy to send a reply if it was killed
> >> with a signal. What I'm not sure if when it's killed I get any partial
> >> bytes from the VFS, I don't think so. I think in that case, the only
> >> thing that I might reply with is a 0bytes copy. And whatever client
> >> that did sent a copy with a delegation stateid would deal with
> >> restarting it with a different stateid.
> >>
> >> Having said all that, I'd like to ask why do you think handling
> >> CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what
> >> is currently there for the synchronous copy. Right now if the server
> >> receives those operations it does nothing to the on-going synchronous
> >> copy.
> >
> > Or for that matter, why should it be different from READ and
> > WRITE--looks like those can also continue executing after a close, too.
> > So, good point.
> >
> >> As you noted, the async copy takes a reference on the parent stateid
> >> so the state won't go away while the async copy is going on. Why not
> >> keep async copy same as the synchronous one?
> >
> > OK, so maybe it's not such a big deal.
> >
> > I'm still confused about the delegation case, though: a copy can take a
> > very long time, so we can't just wait for the copy to end before
> > revoking the delegation.  So do we allow the copy to continue
> > indefinitely even after the delegation stateid it was initiated with has
> > long ago been revoked?
> >
> > Maybe it's not a problem.  I guess I'm having trouble remembering why
> > copies (or IO in general) uses stateids in the first place....
> 
> Sigh. No you are right, we shouldn't let it run long past the
> revocation.

I'm worried by it running long past the revocation, but I'm also not
managing to come with a case where it causes an actual problem.

--b.

> With IO operations, once it starts an operation like
> CLOSE/UNLOCK/DELEGRETURN can arrive at the server and it's ok because
> IO isn't long, and once it's done the client will notice a change in
> stateid and will choose a different stateid to do the next IO
> operation. I guess that's why a synchronous copy is still bordering ok
> because it's relatively short and the next copy will choose a
> different stateid.
> 
> Unless there are any big objections, I'll make the following changes
> 1. CLOSE/UNLOCK will check for none-empty list and return ERR_DELAY.
> 2. change the copy code to once received a signal to return the
> partial copy back.
> 3. DELEGRETURN will stop/kill any on-going copies, which would trigger
> a partial copy reply and the next copy should choose a different
> stateid.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olga Kornievskaia Feb. 16, 2018, 8:53 p.m. UTC | #9
On Fri, Feb 16, 2018 at 1:12 PM, J. Bruce Fields <bfields@redhat.com> wrote:
> On Fri, Feb 16, 2018 at 11:06:07AM -0500, Olga Kornievskaia wrote:
>> On Thu, Feb 15, 2018 at 8:43 PM, J. Bruce Fields <bfields@redhat.com> wrote:
>> > On Thu, Feb 15, 2018 at 05:18:55PM -0500, Olga Kornievskaia wrote:
>> >> On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote:
>> >> > Is it really OK to do a COPY with a delegation stateid?  That sounds
>> >> > messy if the delegation is revoked partway through the copy.
>> >>
>> >> Yes, according to the spec, it is ok to use any of the stateids.
>> >>
>> >> RFC 7862 15.2.3
>> >> The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE
>> >>    operation and follows the rules for stateids in Sections 8.2.5 and
>> >>    18.32.3 of [RFC5661].... while for an intra-server copy
>> >> ca_src_stateid MUST refer
>> >>    to a stateid that is valid for a READ operation and follows the rules
>> >>    for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661].
>> >>
>> >> 8.2.5 is the section that says in the decreasing priority to choose
>> >> delegation stateid, locking stateid, open stateid.
>> >>
>> >> Trying to deal with the delegation stateid seems to be rather complex.
>> >> Here's what I could do (which I mentioned before already):
>> >> 1. possibly change the client to never use a delegation stateid.
>> >> 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty.
>> >> 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any
>> >> on-going copies. Change the nfsd copy to send a reply if it was killed
>> >> with a signal. What I'm not sure if when it's killed I get any partial
>> >> bytes from the VFS, I don't think so. I think in that case, the only
>> >> thing that I might reply with is a 0bytes copy. And whatever client
>> >> that did sent a copy with a delegation stateid would deal with
>> >> restarting it with a different stateid.
>> >>
>> >> Having said all that, I'd like to ask why do you think handling
>> >> CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what
>> >> is currently there for the synchronous copy. Right now if the server
>> >> receives those operations it does nothing to the on-going synchronous
>> >> copy.
>> >
>> > Or for that matter, why should it be different from READ and
>> > WRITE--looks like those can also continue executing after a close, too.
>> > So, good point.
>> >
>> >> As you noted, the async copy takes a reference on the parent stateid
>> >> so the state won't go away while the async copy is going on. Why not
>> >> keep async copy same as the synchronous one?
>> >
>> > OK, so maybe it's not such a big deal.
>> >
>> > I'm still confused about the delegation case, though: a copy can take a
>> > very long time, so we can't just wait for the copy to end before
>> > revoking the delegation.  So do we allow the copy to continue
>> > indefinitely even after the delegation stateid it was initiated with has
>> > long ago been revoked?
>> >
>> > Maybe it's not a problem.  I guess I'm having trouble remembering why
>> > copies (or IO in general) uses stateids in the first place....
>>
>> Sigh. No you are right, we shouldn't let it run long past the
>> revocation.
>
> I'm worried by it running long past the revocation, but I'm also not
> managing to come with a case where it causes an actual problem.

It seems wrong but I'm not sure exactly why. I thinking what if the
server receives all state closing operations (delegreturn, unlock,
close) and the copy is still going. A file is being changed after the
file is closed seem weird. But as I mentioned, properly working client
should be unlocking/closing the file until after the copy is done (or
until offload_cancel is sent after ctrl-c).

Who is responsible for making sure that the file isn't being accessed
after it's closed? Is it server's or client's? Like client is making
sure all the writes are done before it does the close. Should it be
client's responsibility as well to make sure there are no on-going
copies before doing the close?

Is the question here about just delegations or are you questioning
whether or not CLOSE/UNLOCK need to be delayed until the copy is
finished?


> --b.
>
>> With IO operations, once it starts an operation like
>> CLOSE/UNLOCK/DELEGRETURN can arrive at the server and it's ok because
>> IO isn't long, and once it's done the client will notice a change in
>> stateid and will choose a different stateid to do the next IO
>> operation. I guess that's why a synchronous copy is still bordering ok
>> because it's relatively short and the next copy will choose a
>> different stateid.
>>
>> Unless there are any big objections, I'll make the following changes
>> 1. CLOSE/UNLOCK will check for none-empty list and return ERR_DELAY.
>> 2. change the copy code to once received a signal to return the
>> partial copy back.
>> 3. DELEGRETURN will stop/kill any on-going copies, which would trigger
>> a partial copy reply and the next copy should choose a different
>> stateid.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
J. Bruce Fields Feb. 20, 2018, 6:48 p.m. UTC | #10
On Fri, Feb 16, 2018 at 03:53:05PM -0500, Olga Kornievskaia wrote:
> It seems wrong but I'm not sure exactly why. I thinking what if the
> server receives all state closing operations (delegreturn, unlock,
> close) and the copy is still going. A file is being changed after the
> file is closed seem weird. But as I mentioned, properly working client
> should be unlocking/closing the file until after the copy is done (or
> until offload_cancel is sent after ctrl-c).
> 
> Who is responsible for making sure that the file isn't being accessed
> after it's closed? Is it server's or client's? Like client is making
> sure all the writes are done before it does the close. Should it be
> client's responsibility as well to make sure there are no on-going
> copies before doing the close?
> 
> Is the question here about just delegations or are you questioning
> whether or not CLOSE/UNLOCK need to be delayed until the copy is
> finished?

Yeah, I kinda suspect there's a bug in the CLOSE and UNLOCK case too.

I don't know if the client really causes trouble for anyone but itself
if it allows IO to go on past close, unlock, or delegreturn.

If it's still going on after another client acquires a conflicting open,
lock, or delegation, that could be a problem: a client that holds a
mandatory lock, or an open with DENY_WRITE, or a delegation, has a right
not to expect the file to change underneath it.  We should already be
safe against that in the delegation case thanks to the vfs checks in
fs/locks.c:check_conflicting_open().

In the DENY_WRITE case I bet there's still bug.  Maybe in the mandatory
lock case, too.  But those cases are rare and already have other
problems.

So, I guess I don't care too much unless someone's seeing another
problem.

Still, killing off a long-running copy is probably a good precaution?

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olga Kornievskaia March 6, 2018, 5:15 p.m. UTC | #11
On Tue, Feb 20, 2018 at 1:48 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> On Fri, Feb 16, 2018 at 03:53:05PM -0500, Olga Kornievskaia wrote:
>> It seems wrong but I'm not sure exactly why. I thinking what if the
>> server receives all state closing operations (delegreturn, unlock,
>> close) and the copy is still going. A file is being changed after the
>> file is closed seem weird. But as I mentioned, properly working client
>> should be unlocking/closing the file until after the copy is done (or
>> until offload_cancel is sent after ctrl-c).
>>
>> Who is responsible for making sure that the file isn't being accessed
>> after it's closed? Is it server's or client's? Like client is making
>> sure all the writes are done before it does the close. Should it be
>> client's responsibility as well to make sure there are no on-going
>> copies before doing the close?
>>
>> Is the question here about just delegations or are you questioning
>> whether or not CLOSE/UNLOCK need to be delayed until the copy is
>> finished?
>
> Yeah, I kinda suspect there's a bug in the CLOSE and UNLOCK case too.
>
> I don't know if the client really causes trouble for anyone but itself
> if it allows IO to go on past close, unlock, or delegreturn.
>
> If it's still going on after another client acquires a conflicting open,
> lock, or delegation, that could be a problem: a client that holds a
> mandatory lock, or an open with DENY_WRITE, or a delegation, has a right
> not to expect the file to change underneath it.  We should already be
> safe against that in the delegation case thanks to the vfs checks in
> fs/locks.c:check_conflicting_open().
>
> In the DENY_WRITE case I bet there's still bug.  Maybe in the mandatory
> lock case, too.  But those cases are rare and already have other
> problems.
>
> So, I guess I don't care too much unless someone's seeing another
> problem.
>
> Still, killing off a long-running copy is probably a good precaution?

Now that I got back into the code and remembered it again, copy
stateid is never associated with a delegation stateid. Copy is
associated with the destination file's stateid which is opened for
write. Since linux server does not give out write delegations, then
the stateid is either an open stateid or lock stateid (in my testing I
varied not locking and locking the destination file so to get open
stateid as well as lock stateid to be used in the copy). If you want
me to keep the code for killing off copy in the delegreturn because in
the future there might be write delegations, I can but I recall the
rule of thumb is to keep it simple, thus I'm inclined to remove it.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
J. Bruce Fields March 6, 2018, 7:33 p.m. UTC | #12
On Tue, Mar 06, 2018 at 12:15:59PM -0500, Olga Kornievskaia wrote:
> On Tue, Feb 20, 2018 at 1:48 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> > Yeah, I kinda suspect there's a bug in the CLOSE and UNLOCK case too.
> >
> > I don't know if the client really causes trouble for anyone but itself
> > if it allows IO to go on past close, unlock, or delegreturn.
> >
> > If it's still going on after another client acquires a conflicting open,
> > lock, or delegation, that could be a problem: a client that holds a
> > mandatory lock, or an open with DENY_WRITE, or a delegation, has a right
> > not to expect the file to change underneath it.  We should already be
> > safe against that in the delegation case thanks to the vfs checks in
> > fs/locks.c:check_conflicting_open().
> >
> > In the DENY_WRITE case I bet there's still bug.  Maybe in the mandatory
> > lock case, too.  But those cases are rare and already have other
> > problems.
> >
> > So, I guess I don't care too much unless someone's seeing another
> > problem.
> >
> > Still, killing off a long-running copy is probably a good precaution?
> 
> Now that I got back into the code and remembered it again, copy
> stateid is never associated with a delegation stateid. Copy is
> associated with the destination file's stateid which is opened for
> write. Since linux server does not give out write delegations, then
> the stateid is either an open stateid or lock stateid (in my testing I
> varied not locking and locking the destination file so to get open
> stateid as well as lock stateid to be used in the copy). If you want
> me to keep the code for killing off copy in the delegreturn because in
> the future there might be write delegations, I can but I recall the
> rule of thumb is to keep it simple, thus I'm inclined to remove it.

I'm hoping to get to write delegations soon....  And you can actually
write while holding a read delegation.  (Currently knfsd breaks the
delegation in that case, but I'm fixing that.)

That said, I still haven't convinced myself there's any real issue here.
So, I'm fine with leaving that out.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 3714231..2c88a95 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -119,6 +119,14 @@  struct nfsd_net {
 	u32 clverifier_counter;
 
 	struct svc_serv *nfsd_serv;
+
+	/*
+	 * clientid and stateid data for construction of net unique COPY
+	 * stateids.
+	 */
+	u32		s2s_cp_cl_id;
+	struct idr	s2s_cp_stateids;
+	spinlock_t	s2s_cp_lock;
 };
 
 /* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a020533..6876080 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1039,7 +1039,8 @@  static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 static __be32
 nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		  stateid_t *src_stateid, struct file **src,
-		  stateid_t *dst_stateid, struct file **dst)
+		  stateid_t *dst_stateid, struct file **dst,
+		  struct nfs4_stid **stid)
 {
 	__be32 status;
 
@@ -1053,7 +1054,7 @@  static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					    dst_stateid, WR_STATE, dst, NULL,
-					    NULL);
+					    stid);
 	if (status) {
 		dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
 		goto out_put_src;
@@ -1084,7 +1085,7 @@  static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 	__be32 status;
 
 	status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
-				   &clone->cl_dst_stateid, &dst);
+				   &clone->cl_dst_stateid, &dst, NULL);
 	if (status)
 		goto out;
 
@@ -1117,8 +1118,6 @@  static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
 
 static int nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
 {
-	memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
-		sizeof(copy->cp_dst_stateid));
 	copy->cp_res.wr_stable_how = NFS_UNSTABLE;
 	copy->cp_consecutive = 1;
 	copy->cp_synchronous = sync;
@@ -1180,10 +1179,15 @@  static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
 	atomic_inc(&dst->cp_clp->cl_refcount);
 	dst->fh_dst = get_file(src->fh_dst);
 	dst->fh_src = get_file(src->fh_src);
+	dst->stid = src->stid;
+	dst->cps = src->cps;
 }
 
 static void cleanup_async_copy(struct nfsd4_copy *copy)
 {
+	list_del(&copy->cps->cp_list);
+	nfs4_free_cp_state(copy->cps);
+	nfs4_put_stid(copy->stid);
 	fput(copy->fh_dst);
 	fput(copy->fh_src);
 	spin_lock(&copy->cp_clp->async_lock);
@@ -1225,7 +1229,7 @@  static int nfsd4_do_async_copy(void *data)
 
 	status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
 				   &copy->fh_src, &copy->cp_dst_stateid,
-				   &copy->fh_dst);
+				   &copy->fh_dst, &copy->stid);
 	if (status)
 		goto out;
 
@@ -1234,15 +1238,27 @@  static int nfsd4_do_async_copy(void *data)
 		sizeof(struct knfsd_fh));
 	copy->net = SVC_NET(rqstp);
 	if (!copy->cp_synchronous) {
+		struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
 		status = nfsd4_init_copy_res(copy, 0);
 		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
 		if (!async_copy) {
 			status = nfserrno(-ENOMEM);
 			goto out;
 		}
+		copy->cps = nfs4_alloc_init_cp_state(nn, copy->stid);
+		if (!copy->cps) {
+			status = nfserrno(-ENOMEM);
+			kfree(async_copy);
+			goto out;
+		}
+		/* take a reference on the parent stateid so it's not
+		 * not freed by the copy compound
+		 */
+		atomic_inc(&copy->stid->sc_count);
+		memcpy(&copy->cp_res.cb_stateid, &copy->cps->cp_stateid,
+			sizeof(copy->cps->cp_stateid));
 		dup_copy_fields(copy, async_copy);
-		memcpy(&copy->cp_res.cb_stateid, &copy->cp_dst_stateid,
-			sizeof(copy->cp_dst_stateid));
 		spin_lock(&async_copy->cp_clp->async_lock);
 		list_add(&async_copy->copies,
 				&async_copy->cp_clp->async_copies);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index af40762..f9151f2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -658,6 +658,7 @@  struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
 	/* Will be incremented before return to client: */
 	atomic_set(&stid->sc_count, 1);
 	spin_lock_init(&stid->sc_lock);
+	INIT_LIST_HEAD(&stid->sc_cp_list);
 
 	/*
 	 * It shouldn't be a problem to reuse an opaque stateid value.
@@ -674,6 +675,66 @@  struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
 	return NULL;
 }
 
+/*
+ * Create a unique stateid_t to represent each COPY. Hang the copy
+ * stateids off the OPEN/LOCK/DELEG stateid from the client open
+ * the source file.
+ */
+struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn,
+					       struct nfs4_stid *p_stid)
+{
+	struct nfs4_cp_state *cps;
+	int new_id;
+
+	cps = kzalloc(sizeof(struct nfs4_cp_state), GFP_KERNEL);
+	if (!cps)
+		return NULL;
+	idr_preload(GFP_KERNEL);
+	spin_lock(&nn->s2s_cp_lock);
+	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, cps, 0, 0, GFP_NOWAIT);
+	spin_unlock(&nn->s2s_cp_lock);
+	idr_preload_end();
+	if (new_id < 0)
+		goto out_free;
+	cps->cp_stateid.si_opaque.so_id = new_id;
+	cps->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
+	cps->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+	cps->cp_p_stid = p_stid;
+	INIT_LIST_HEAD(&cps->cp_list);
+	list_add(&cps->cp_list, &p_stid->sc_cp_list);
+
+	return cps;
+out_free:
+	kfree(cps);
+	return NULL;
+}
+
+void nfs4_free_cp_state(struct nfs4_cp_state *cps)
+{
+	struct nfsd_net *nn;
+
+	nn = net_generic(cps->cp_p_stid->sc_client->net, nfsd_net_id);
+	spin_lock(&nn->s2s_cp_lock);
+	idr_remove(&nn->s2s_cp_stateids, cps->cp_stateid.si_opaque.so_id);
+	spin_unlock(&nn->s2s_cp_lock);
+
+	kfree(cps);
+}
+
+static void nfs4_free_cp_statelist(struct nfs4_stid *stid)
+{
+	struct nfs4_cp_state *cps;
+
+	might_sleep();
+
+	while (!list_empty(&stid->sc_cp_list)) {
+		cps = list_first_entry(&stid->sc_cp_list, struct nfs4_cp_state,
+				       cp_list);
+		list_del(&cps->cp_list);
+		nfs4_free_cp_state(cps);
+	}
+}
+
 static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
 {
 	struct nfs4_stid *stid;
@@ -819,6 +880,9 @@  static void block_delegations(struct knfsd_fh *fh)
 	}
 	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
 	spin_unlock(&clp->cl_lock);
+
+	nfs4_free_cp_statelist(s);
+
 	s->sc_free(s);
 	if (fp)
 		put_nfs4_file(fp);
@@ -6954,6 +7018,8 @@  static int nfs4_state_create_net(struct net *net)
 	INIT_LIST_HEAD(&nn->close_lru);
 	INIT_LIST_HEAD(&nn->del_recall_lru);
 	spin_lock_init(&nn->client_lock);
+	spin_lock_init(&nn->s2s_cp_lock);
+	idr_init(&nn->s2s_cp_stateids);
 
 	spin_lock_init(&nn->blocked_locks_lock);
 	INIT_LIST_HEAD(&nn->blocked_locks_lru);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 6493df6..232270d 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1241,6 +1241,7 @@  static __net_init int nfsd_init_net(struct net *net)
 	nn->nfsd4_grace = 90;
 	nn->clverifier_counter = prandom_u32();
 	nn->clientid_counter = prandom_u32();
+	nn->s2s_cp_cl_id = nn->clientid_counter++;
 	return 0;
 
 out_idmap_error:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d58507b..b4de8ae 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -93,6 +93,7 @@  struct nfs4_stid {
 #define NFS4_REVOKED_DELEG_STID 16
 #define NFS4_CLOSED_DELEG_STID 32
 #define NFS4_LAYOUT_STID 64
+	struct list_head	sc_cp_list;
 	unsigned char		sc_type;
 	stateid_t		sc_stateid;
 	spinlock_t		sc_lock;
@@ -102,6 +103,17 @@  struct nfs4_stid {
 };
 
 /*
+ * Keep a list of stateids issued by the COPY, associate it with the
+ * parent OPEN/LOCK/DELEG stateid. Used for lookup by
+ * OFFLOAD_CANCEL and OFFLOAD_STATUS (as well as COPY_NOTIFY)
+ */
+struct nfs4_cp_state {
+	stateid_t		cp_stateid;
+	struct list_head	cp_list;	/* per parent nfs4_stid */
+	struct nfs4_stid        *cp_p_stid;     /* pointer to parent */
+};
+
+/*
  * Represents a delegation stateid. The nfs4_client holds references to these
  * and they are put when it is being destroyed or when the delegation is
  * returned by the client:
@@ -609,6 +621,8 @@  __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 		     struct nfs4_stid **s, struct nfsd_net *nn);
 struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
 				  void (*sc_free)(struct nfs4_stid *));
+struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn, struct nfs4_stid *p_stid);
+void nfs4_free_cp_state(struct nfs4_cp_state *cps);
 void nfs4_unhash_stid(struct nfs4_stid *s);
 void nfs4_put_stid(struct nfs4_stid *s);
 void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 0a19954..27bac6a 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -535,6 +535,8 @@  struct nfsd4_copy {
 	struct file             *fh_src;
 	struct file             *fh_dst;
 	struct net              *net;
+	struct nfs4_stid        *stid;
+	struct nfs4_cp_state    *cps;
 
 	struct list_head	copies;
 	struct task_struct	*copy_task;