Message ID | 20171024174752.74910-8-kolga@netapp.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Oct 24, 2017 at 01:47:49PM -0400, Olga Kornievskaia wrote: > +/* > + * Create a unique stateid_t to represent each COPY. Hang the copy > + * stateids off the OPEN/LOCK/DELEG stateid from the client open > + * the source file. > + */ > +struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn, > + struct nfs4_stid *p_stid) > +{ > + struct nfs4_cp_state *cps; > + int new_id; > + > + cps = kzalloc(sizeof(struct nfs4_cp_state), GFP_KERNEL); > + if (!cps) > + return NULL; > + idr_preload(GFP_KERNEL); > + spin_lock(&nn->s2s_cp_lock); > + new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, cps, 0, 0, GFP_NOWAIT); > + spin_unlock(&nn->s2s_cp_lock); > + idr_preload_end(); > + if (new_id < 0) > + goto out_free; > + cps->cp_stateid.si_opaque.so_id = new_id; > + cps->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time; > + cps->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; > + cps->cp_p_stid = p_stid; > + INIT_LIST_HEAD(&cps->cp_list); The INIT_LIST_HEAD is redundant here, it'll just be overridden by the list_add below. > + list_add(&cps->cp_list, &p_stid->sc_cp_list); p_stid is a preexisting stateid that's visible to other nfsd threads too, right? So in theory another COPY using the same parent stateid could be running concurrently with this one? Maybe you could just use p_stid->sc_lock for this. Ditto for any other manipulations of that list. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Oct 24, 2017 at 01:47:49PM -0400, Olga Kornievskaia wrote: > Generate a new stateid to be used for reply to the asynchronous > COPY (this would also be used later by COPY_NOTIFY as well). > Associate the stateid with the parent OPEN/LOCK/DELEG stateid > that can be freed during the free of the parent stateid. However, > right now deciding to bind the lifetime to when the vfs copy > is done. This way don't need to keep the nfsd_net structure for > the callback. The drawback is that time copy state information > is available for query by OFFLOAD_STATUS is slightly less. > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com> > --- > fs/nfsd/netns.h | 8 +++++++ > fs/nfsd/nfs4proc.c | 32 +++++++++++++++++++------- > fs/nfsd/nfs4state.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/nfsctl.c | 1 + > fs/nfsd/state.h | 14 ++++++++++++ > fs/nfsd/xdr4.h | 2 ++ > 6 files changed, 115 insertions(+), 8 deletions(-) > > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h > index 3714231..2c88a95 100644 > --- a/fs/nfsd/netns.h > +++ b/fs/nfsd/netns.h > @@ -119,6 +119,14 @@ struct nfsd_net { > u32 clverifier_counter; > > struct svc_serv *nfsd_serv; > + > + /* > + * clientid and stateid data for construction of net unique COPY > + * stateids. > + */ > + u32 s2s_cp_cl_id; > + struct idr s2s_cp_stateids; > + spinlock_t s2s_cp_lock; > }; > > /* Simple check to find out if a given net was properly initialized */ > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index a020533..6876080 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -1039,7 +1039,8 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) > static __be32 > nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > stateid_t *src_stateid, struct file **src, > - stateid_t *dst_stateid, struct file **dst) > + stateid_t *dst_stateid, struct file **dst, > + struct nfs4_stid **stid) > { > __be32 status; > > @@ -1053,7 +1054,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) > > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > dst_stateid, WR_STATE, dst, NULL, > - NULL); > + stid); > if (status) { > dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); > goto out_put_src; > @@ -1084,7 +1085,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) > __be32 status; > > status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src, > - &clone->cl_dst_stateid, &dst); > + &clone->cl_dst_stateid, &dst, NULL); > if (status) > goto out; > > @@ -1117,8 +1118,6 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, > > static int nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) > { > - memcpy(©->cp_res.cb_stateid, ©->cp_dst_stateid, > - sizeof(copy->cp_dst_stateid)); > copy->cp_res.wr_stable_how = NFS_UNSTABLE; > copy->cp_consecutive = 1; > copy->cp_synchronous = sync; > @@ -1180,10 +1179,15 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) > atomic_inc(&dst->cp_clp->cl_refcount); > dst->fh_dst = get_file(src->fh_dst); > dst->fh_src = get_file(src->fh_src); > + dst->stid = src->stid; > + dst->cps = src->cps; > } > > static void cleanup_async_copy(struct nfsd4_copy *copy) > { > + list_del(©->cps->cp_list); > + nfs4_free_cp_state(copy->cps); > + nfs4_put_stid(copy->stid); > fput(copy->fh_dst); > fput(copy->fh_src); > spin_lock(©->cp_clp->async_lock); > @@ -1225,7 +1229,7 @@ static int nfsd4_do_async_copy(void *data) > > status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, > ©->fh_src, ©->cp_dst_stateid, > - ©->fh_dst); > + ©->fh_dst, ©->stid); > if (status) > goto out; > > @@ -1234,15 +1238,27 @@ static int nfsd4_do_async_copy(void *data) > sizeof(struct knfsd_fh)); > copy->net = SVC_NET(rqstp); > if (!copy->cp_synchronous) { > + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); > + > status = nfsd4_init_copy_res(copy, 0); > async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); > if (!async_copy) { > status = nfserrno(-ENOMEM); > goto out; > } > + copy->cps = nfs4_alloc_init_cp_state(nn, copy->stid); > + if (!copy->cps) { > + status = nfserrno(-ENOMEM); > + kfree(async_copy); > + goto out; > + } > + /* take a reference on the parent stateid so it's not > + * not freed by the copy compound > + */ > + atomic_inc(©->stid->sc_count); > + memcpy(©->cp_res.cb_stateid, ©->cps->cp_stateid, > + sizeof(copy->cps->cp_stateid)); > dup_copy_fields(copy, async_copy); > - memcpy(©->cp_res.cb_stateid, ©->cp_dst_stateid, > - sizeof(copy->cp_dst_stateid)); > spin_lock(&async_copy->cp_clp->async_lock); > list_add(&async_copy->copies, > &async_copy->cp_clp->async_copies); > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index af40762..f9151f2 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -658,6 +658,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla > /* Will be incremented before return to client: */ > atomic_set(&stid->sc_count, 1); > spin_lock_init(&stid->sc_lock); > + INIT_LIST_HEAD(&stid->sc_cp_list); > > /* > * It shouldn't be a problem to reuse an opaque stateid value. > @@ -674,6 +675,66 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla > return NULL; > } > > +/* > + * Create a unique stateid_t to represent each COPY. Hang the copy > + * stateids off the OPEN/LOCK/DELEG stateid from the client open > + * the source file. > + */ > +struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn, > + struct nfs4_stid *p_stid) > +{ > + struct nfs4_cp_state *cps; > + int new_id; > + > + cps = kzalloc(sizeof(struct nfs4_cp_state), GFP_KERNEL); > + if (!cps) > + return NULL; > + idr_preload(GFP_KERNEL); > + spin_lock(&nn->s2s_cp_lock); > + new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, cps, 0, 0, GFP_NOWAIT); > + spin_unlock(&nn->s2s_cp_lock); > + idr_preload_end(); > + if (new_id < 0) > + goto out_free; > + cps->cp_stateid.si_opaque.so_id = new_id; > + cps->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time; > + cps->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; > + cps->cp_p_stid = p_stid; > + INIT_LIST_HEAD(&cps->cp_list); > + list_add(&cps->cp_list, &p_stid->sc_cp_list); > + > + return cps; > +out_free: > + kfree(cps); > + return NULL; > +} > + > +void nfs4_free_cp_state(struct nfs4_cp_state *cps) > +{ > + struct nfsd_net *nn; > + > + nn = net_generic(cps->cp_p_stid->sc_client->net, nfsd_net_id); > + spin_lock(&nn->s2s_cp_lock); > + idr_remove(&nn->s2s_cp_stateids, cps->cp_stateid.si_opaque.so_id); > + spin_unlock(&nn->s2s_cp_lock); > + > + kfree(cps); > +} > + > +static void nfs4_free_cp_statelist(struct nfs4_stid *stid) > +{ > + struct nfs4_cp_state *cps; > + > + might_sleep(); > + > + while (!list_empty(&stid->sc_cp_list)) { > + cps = list_first_entry(&stid->sc_cp_list, struct nfs4_cp_state, > + cp_list); > + list_del(&cps->cp_list); > + nfs4_free_cp_state(cps); > + } > +} > + > static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) > { > struct nfs4_stid *stid; > @@ -819,6 +880,9 @@ static void block_delegations(struct knfsd_fh *fh) > } > idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); > spin_unlock(&clp->cl_lock); > + > + nfs4_free_cp_statelist(s); Is this really safe if there are still ongoing copies? I think the copies take a reference count on the parent stateid, so I think we should never actually get here: perhaps this could just be a WARN_ON(!list_empty(&s->s_cp_list)). Though nfsd4_copy puts things on this list before bumping the sc_count, so there may be a race there. What is supposed to happen, by the way, if a close arrives for a stateid with a copy in progress? Do we cancel the copy before returning from the close, or do we fail the close? (Same question for unlock and delegation return, I guess.) --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Jan 26, 2018 at 4:59 PM, J. Bruce Fields <bfields@fieldses.org> wrote: > On Tue, Oct 24, 2017 at 01:47:49PM -0400, Olga Kornievskaia wrote: >> Generate a new stateid to be used for reply to the asynchronous >> COPY (this would also be used later by COPY_NOTIFY as well). >> Associate the stateid with the parent OPEN/LOCK/DELEG stateid >> that can be freed during the free of the parent stateid. However, >> right now deciding to bind the lifetime to when the vfs copy >> is done. This way don't need to keep the nfsd_net structure for >> the callback. The drawback is that time copy state information >> is available for query by OFFLOAD_STATUS is slightly less. >> >> Signed-off-by: Olga Kornievskaia <kolga@netapp.com> >> --- >> fs/nfsd/netns.h | 8 +++++++ >> fs/nfsd/nfs4proc.c | 32 +++++++++++++++++++------- >> fs/nfsd/nfs4state.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++ >> fs/nfsd/nfsctl.c | 1 + >> fs/nfsd/state.h | 14 ++++++++++++ >> fs/nfsd/xdr4.h | 2 ++ >> 6 files changed, 115 insertions(+), 8 deletions(-) >> >> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h >> index 3714231..2c88a95 100644 >> --- a/fs/nfsd/netns.h >> +++ b/fs/nfsd/netns.h >> @@ -119,6 +119,14 @@ struct nfsd_net { >> u32 clverifier_counter; >> >> struct svc_serv *nfsd_serv; >> + >> + /* >> + * clientid and stateid data for construction of net unique COPY >> + * stateids. >> + */ >> + u32 s2s_cp_cl_id; >> + struct idr s2s_cp_stateids; >> + spinlock_t s2s_cp_lock; >> }; >> >> /* Simple check to find out if a given net was properly initialized */ >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >> index a020533..6876080 100644 >> --- a/fs/nfsd/nfs4proc.c >> +++ b/fs/nfsd/nfs4proc.c >> @@ -1039,7 +1039,8 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) >> static __be32 >> nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> stateid_t *src_stateid, struct file **src, >> - stateid_t *dst_stateid, struct file **dst) >> + stateid_t *dst_stateid, struct file **dst, >> + struct nfs4_stid **stid) >> { >> __be32 status; >> >> @@ -1053,7 +1054,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) >> >> status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, >> dst_stateid, WR_STATE, dst, NULL, >> - NULL); >> + stid); >> if (status) { >> dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); >> goto out_put_src; >> @@ -1084,7 +1085,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) >> __be32 status; >> >> status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src, >> - &clone->cl_dst_stateid, &dst); >> + &clone->cl_dst_stateid, &dst, NULL); >> if (status) >> goto out; >> >> @@ -1117,8 +1118,6 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, >> >> static int nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) >> { >> - memcpy(©->cp_res.cb_stateid, ©->cp_dst_stateid, >> - sizeof(copy->cp_dst_stateid)); >> copy->cp_res.wr_stable_how = NFS_UNSTABLE; >> copy->cp_consecutive = 1; >> copy->cp_synchronous = sync; >> @@ -1180,10 +1179,15 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) >> atomic_inc(&dst->cp_clp->cl_refcount); >> dst->fh_dst = get_file(src->fh_dst); >> dst->fh_src = get_file(src->fh_src); >> + dst->stid = src->stid; >> + dst->cps = src->cps; >> } >> >> static void cleanup_async_copy(struct nfsd4_copy *copy) >> { >> + list_del(©->cps->cp_list); >> + nfs4_free_cp_state(copy->cps); >> + nfs4_put_stid(copy->stid); >> fput(copy->fh_dst); >> fput(copy->fh_src); >> spin_lock(©->cp_clp->async_lock); >> @@ -1225,7 +1229,7 @@ static int nfsd4_do_async_copy(void *data) >> >> status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, >> ©->fh_src, ©->cp_dst_stateid, >> - ©->fh_dst); >> + ©->fh_dst, ©->stid); >> if (status) >> goto out; >> >> @@ -1234,15 +1238,27 @@ static int nfsd4_do_async_copy(void *data) >> sizeof(struct knfsd_fh)); >> copy->net = SVC_NET(rqstp); >> if (!copy->cp_synchronous) { >> + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); >> + >> status = nfsd4_init_copy_res(copy, 0); >> async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); >> if (!async_copy) { >> status = nfserrno(-ENOMEM); >> goto out; >> } >> + copy->cps = nfs4_alloc_init_cp_state(nn, copy->stid); >> + if (!copy->cps) { >> + status = nfserrno(-ENOMEM); >> + kfree(async_copy); >> + goto out; >> + } >> + /* take a reference on the parent stateid so it's not >> + * not freed by the copy compound >> + */ >> + atomic_inc(©->stid->sc_count); >> + memcpy(©->cp_res.cb_stateid, ©->cps->cp_stateid, >> + sizeof(copy->cps->cp_stateid)); >> dup_copy_fields(copy, async_copy); >> - memcpy(©->cp_res.cb_stateid, ©->cp_dst_stateid, >> - sizeof(copy->cp_dst_stateid)); >> spin_lock(&async_copy->cp_clp->async_lock); >> list_add(&async_copy->copies, >> &async_copy->cp_clp->async_copies); >> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >> index af40762..f9151f2 100644 >> --- a/fs/nfsd/nfs4state.c >> +++ b/fs/nfsd/nfs4state.c >> @@ -658,6 +658,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla >> /* Will be incremented before return to client: */ >> atomic_set(&stid->sc_count, 1); >> spin_lock_init(&stid->sc_lock); >> + INIT_LIST_HEAD(&stid->sc_cp_list); >> >> /* >> * It shouldn't be a problem to reuse an opaque stateid value. >> @@ -674,6 +675,66 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla >> return NULL; >> } >> >> +/* >> + * Create a unique stateid_t to represent each COPY. Hang the copy >> + * stateids off the OPEN/LOCK/DELEG stateid from the client open >> + * the source file. >> + */ >> +struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn, >> + struct nfs4_stid *p_stid) >> +{ >> + struct nfs4_cp_state *cps; >> + int new_id; >> + >> + cps = kzalloc(sizeof(struct nfs4_cp_state), GFP_KERNEL); >> + if (!cps) >> + return NULL; >> + idr_preload(GFP_KERNEL); >> + spin_lock(&nn->s2s_cp_lock); >> + new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, cps, 0, 0, GFP_NOWAIT); >> + spin_unlock(&nn->s2s_cp_lock); >> + idr_preload_end(); >> + if (new_id < 0) >> + goto out_free; >> + cps->cp_stateid.si_opaque.so_id = new_id; >> + cps->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time; >> + cps->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; >> + cps->cp_p_stid = p_stid; >> + INIT_LIST_HEAD(&cps->cp_list); >> + list_add(&cps->cp_list, &p_stid->sc_cp_list); >> + >> + return cps; >> +out_free: >> + kfree(cps); >> + return NULL; >> +} >> + >> +void nfs4_free_cp_state(struct nfs4_cp_state *cps) >> +{ >> + struct nfsd_net *nn; >> + >> + nn = net_generic(cps->cp_p_stid->sc_client->net, nfsd_net_id); >> + spin_lock(&nn->s2s_cp_lock); >> + idr_remove(&nn->s2s_cp_stateids, cps->cp_stateid.si_opaque.so_id); >> + spin_unlock(&nn->s2s_cp_lock); >> + >> + kfree(cps); >> +} >> + >> +static void nfs4_free_cp_statelist(struct nfs4_stid *stid) >> +{ >> + struct nfs4_cp_state *cps; >> + >> + might_sleep(); >> + >> + while (!list_empty(&stid->sc_cp_list)) { >> + cps = list_first_entry(&stid->sc_cp_list, struct nfs4_cp_state, >> + cp_list); >> + list_del(&cps->cp_list); >> + nfs4_free_cp_state(cps); >> + } >> +} >> + >> static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) >> { >> struct nfs4_stid *stid; >> @@ -819,6 +880,9 @@ static void block_delegations(struct knfsd_fh *fh) >> } >> idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); >> spin_unlock(&clp->cl_lock); >> + >> + nfs4_free_cp_statelist(s); > > Is this really safe if there are still ongoing copies? You are right. It is not. > I think the copies take a reference count on the parent stateid, so I > think we should never actually get here: perhaps this could just be a > WARN_ON(!list_empty(&s->s_cp_list)). Though nfsd4_copy puts things on > this list before bumping the sc_count, so there may be a race there. > > What is supposed to happen, by the way, if a close arrives for a stateid > with a copy in progress? Do we cancel the copy before returning from > the close, or do we fail the close? (Same question for unlock and > delegation return, I guess.) Good questions. I think a normal client shouldn't send CLOSE/UNLOCK while there is an on-going copy. I could see how a DELEGRETURN could be coming in the middle of the copy (because the client received a CB_RECALL). For an easy solution my proposal is to do the following. Upon receiving close, unlock, delegreturn, if the copy list is non-empty, then we send an ERR_DELAY back until the copy is done. Another proposal is to kill the copy thread but that seems too drastic (specially in the case of a delegreturn). Right now when the copy thread is killed, it does not send a reply back to the client (as it assumes the client sent the OFFLOAD_CANCEL and doesn't need a reply). I could change that to send a reply back. In case the copy used a delegation stateid, then the next copy would choose a different stateid. This could be done for the delegation stateid and ERR_DELAY for the close/unlock. Is either one acceptable and if so which one sounds better to you? > > --b. > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Feb 02, 2018 at 03:45:28PM -0500, Olga Kornievskaia wrote: > On Fri, Jan 26, 2018 at 4:59 PM, J. Bruce Fields <bfields@fieldses.org> wrote: > > What is supposed to happen, by the way, if a close arrives for a stateid > > with a copy in progress? Do we cancel the copy before returning from > > the close, or do we fail the close? (Same question for unlock and > > delegation return, I guess.) > > Good questions. I think a normal client shouldn't send CLOSE/UNLOCK > while there is an on-going copy. I could see how a DELEGRETURN could > be coming in the middle of the copy (because the client received a > CB_RECALL). > > For an easy solution my proposal is to do the following. Upon > receiving close, unlock, delegreturn, if the copy list is non-empty, > then we send an ERR_DELAY back until the copy is done. > > Another proposal is to kill the copy thread but that seems too drastic > (specially in the case of a delegreturn). Right now when the copy > thread is killed, it does not send a reply back to the client (as it > assumes the client sent the OFFLOAD_CANCEL and doesn't need a reply). > I could change that to send a reply back. In case the copy used a > delegation stateid, then the next copy would choose a different > stateid. This could be done for the delegation stateid and ERR_DELAY > for the close/unlock. > > Is either one acceptable and if so which one sounds better to you? If we're confident this is something that only a misbehaving client would do, then I'm OK with choosing whatever's simplest. I guess we should double-check with the spec to make sure this case hasn't already been covered there. Returning an error on the conflicting operation sounds good to me. Is it really OK to do a COPY with a delegation stateid? That sounds messy if the delegation is revoked partway through the copy. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote: > On Fri, Feb 02, 2018 at 03:45:28PM -0500, Olga Kornievskaia wrote: >> On Fri, Jan 26, 2018 at 4:59 PM, J. Bruce Fields <bfields@fieldses.org> wrote: >> > What is supposed to happen, by the way, if a close arrives for a stateid >> > with a copy in progress? Do we cancel the copy before returning from >> > the close, or do we fail the close? (Same question for unlock and >> > delegation return, I guess.) >> >> Good questions. I think a normal client shouldn't send CLOSE/UNLOCK >> while there is an on-going copy. I could see how a DELEGRETURN could >> be coming in the middle of the copy (because the client received a >> CB_RECALL). >> >> For an easy solution my proposal is to do the following. Upon >> receiving close, unlock, delegreturn, if the copy list is non-empty, >> then we send an ERR_DELAY back until the copy is done. >> >> Another proposal is to kill the copy thread but that seems too drastic >> (specially in the case of a delegreturn). Right now when the copy >> thread is killed, it does not send a reply back to the client (as it >> assumes the client sent the OFFLOAD_CANCEL and doesn't need a reply). >> I could change that to send a reply back. In case the copy used a >> delegation stateid, then the next copy would choose a different >> stateid. This could be done for the delegation stateid and ERR_DELAY >> for the close/unlock. >> >> Is either one acceptable and if so which one sounds better to you? > > If we're confident this is something that only a misbehaving client > would do, then I'm OK with choosing whatever's simplest. I guess we > should double-check with the spec to make sure this case hasn't already > been covered there. Returning an error on the conflicting operation > sounds good to me. > > Is it really OK to do a COPY with a delegation stateid? That sounds > messy if the delegation is revoked partway through the copy. Yes, according to the spec, it is ok to use any of the stateids. RFC 7862 15.2.3 The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE operation and follows the rules for stateids in Sections 8.2.5 and 18.32.3 of [RFC5661].... while for an intra-server copy ca_src_stateid MUST refer to a stateid that is valid for a READ operation and follows the rules for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661]. 8.2.5 is the section that says in the decreasing priority to choose delegation stateid, locking stateid, open stateid. Trying to deal with the delegation stateid seems to be rather complex. Here's what I could do (which I mentioned before already): 1. possibly change the client to never use a delegation stateid. 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty. 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any on-going copies. Change the nfsd copy to send a reply if it was killed with a signal. What I'm not sure if when it's killed I get any partial bytes from the VFS, I don't think so. I think in that case, the only thing that I might reply with is a 0bytes copy. And whatever client that did sent a copy with a delegation stateid would deal with restarting it with a different stateid. Having said all that, I'd like to ask why do you think handling CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what is currently there for the synchronous copy. Right now if the server receives those operations it does nothing to the on-going synchronous copy. As you noted, the async copy takes a reference on the parent stateid so the state won't go away while the async copy is going on. Why not keep async copy same as the synchronous one? -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Feb 15, 2018 at 05:18:55PM -0500, Olga Kornievskaia wrote: > On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote: > > Is it really OK to do a COPY with a delegation stateid? That sounds > > messy if the delegation is revoked partway through the copy. > > Yes, according to the spec, it is ok to use any of the stateids. > > RFC 7862 15.2.3 > The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE > operation and follows the rules for stateids in Sections 8.2.5 and > 18.32.3 of [RFC5661].... while for an intra-server copy > ca_src_stateid MUST refer > to a stateid that is valid for a READ operation and follows the rules > for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661]. > > 8.2.5 is the section that says in the decreasing priority to choose > delegation stateid, locking stateid, open stateid. > > Trying to deal with the delegation stateid seems to be rather complex. > Here's what I could do (which I mentioned before already): > 1. possibly change the client to never use a delegation stateid. > 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty. > 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any > on-going copies. Change the nfsd copy to send a reply if it was killed > with a signal. What I'm not sure if when it's killed I get any partial > bytes from the VFS, I don't think so. I think in that case, the only > thing that I might reply with is a 0bytes copy. And whatever client > that did sent a copy with a delegation stateid would deal with > restarting it with a different stateid. > > Having said all that, I'd like to ask why do you think handling > CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what > is currently there for the synchronous copy. Right now if the server > receives those operations it does nothing to the on-going synchronous > copy. Or for that matter, why should it be different from READ and WRITE--looks like those can also continue executing after a close, too. So, good point. > As you noted, the async copy takes a reference on the parent stateid > so the state won't go away while the async copy is going on. Why not > keep async copy same as the synchronous one? OK, so maybe it's not such a big deal. I'm still confused about the delegation case, though: a copy can take a very long time, so we can't just wait for the copy to end before revoking the delegation. So do we allow the copy to continue indefinitely even after the delegation stateid it was initiated with has long ago been revoked? Maybe it's not a problem. I guess I'm having trouble remembering why copies (or IO in general) uses stateids in the first place.... --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Feb 15, 2018 at 8:43 PM, J. Bruce Fields <bfields@redhat.com> wrote: > On Thu, Feb 15, 2018 at 05:18:55PM -0500, Olga Kornievskaia wrote: >> On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote: >> > Is it really OK to do a COPY with a delegation stateid? That sounds >> > messy if the delegation is revoked partway through the copy. >> >> Yes, according to the spec, it is ok to use any of the stateids. >> >> RFC 7862 15.2.3 >> The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE >> operation and follows the rules for stateids in Sections 8.2.5 and >> 18.32.3 of [RFC5661].... while for an intra-server copy >> ca_src_stateid MUST refer >> to a stateid that is valid for a READ operation and follows the rules >> for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661]. >> >> 8.2.5 is the section that says in the decreasing priority to choose >> delegation stateid, locking stateid, open stateid. >> >> Trying to deal with the delegation stateid seems to be rather complex. >> Here's what I could do (which I mentioned before already): >> 1. possibly change the client to never use a delegation stateid. >> 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty. >> 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any >> on-going copies. Change the nfsd copy to send a reply if it was killed >> with a signal. What I'm not sure if when it's killed I get any partial >> bytes from the VFS, I don't think so. I think in that case, the only >> thing that I might reply with is a 0bytes copy. And whatever client >> that did sent a copy with a delegation stateid would deal with >> restarting it with a different stateid. >> >> Having said all that, I'd like to ask why do you think handling >> CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what >> is currently there for the synchronous copy. Right now if the server >> receives those operations it does nothing to the on-going synchronous >> copy. > > Or for that matter, why should it be different from READ and > WRITE--looks like those can also continue executing after a close, too. > So, good point. > >> As you noted, the async copy takes a reference on the parent stateid >> so the state won't go away while the async copy is going on. Why not >> keep async copy same as the synchronous one? > > OK, so maybe it's not such a big deal. > > I'm still confused about the delegation case, though: a copy can take a > very long time, so we can't just wait for the copy to end before > revoking the delegation. So do we allow the copy to continue > indefinitely even after the delegation stateid it was initiated with has > long ago been revoked? > > Maybe it's not a problem. I guess I'm having trouble remembering why > copies (or IO in general) uses stateids in the first place.... Sigh. No you are right, we shouldn't let it run long past the revocation. With IO operations, once it starts an operation like CLOSE/UNLOCK/DELEGRETURN can arrive at the server and it's ok because IO isn't long, and once it's done the client will notice a change in stateid and will choose a different stateid to do the next IO operation. I guess that's why a synchronous copy is still bordering ok because it's relatively short and the next copy will choose a different stateid. Unless there are any big objections, I'll make the following changes 1. CLOSE/UNLOCK will check for none-empty list and return ERR_DELAY. 2. change the copy code to once received a signal to return the partial copy back. 3. DELEGRETURN will stop/kill any on-going copies, which would trigger a partial copy reply and the next copy should choose a different stateid. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Feb 16, 2018 at 11:06:07AM -0500, Olga Kornievskaia wrote: > On Thu, Feb 15, 2018 at 8:43 PM, J. Bruce Fields <bfields@redhat.com> wrote: > > On Thu, Feb 15, 2018 at 05:18:55PM -0500, Olga Kornievskaia wrote: > >> On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote: > >> > Is it really OK to do a COPY with a delegation stateid? That sounds > >> > messy if the delegation is revoked partway through the copy. > >> > >> Yes, according to the spec, it is ok to use any of the stateids. > >> > >> RFC 7862 15.2.3 > >> The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE > >> operation and follows the rules for stateids in Sections 8.2.5 and > >> 18.32.3 of [RFC5661].... while for an intra-server copy > >> ca_src_stateid MUST refer > >> to a stateid that is valid for a READ operation and follows the rules > >> for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661]. > >> > >> 8.2.5 is the section that says in the decreasing priority to choose > >> delegation stateid, locking stateid, open stateid. > >> > >> Trying to deal with the delegation stateid seems to be rather complex. > >> Here's what I could do (which I mentioned before already): > >> 1. possibly change the client to never use a delegation stateid. > >> 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty. > >> 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any > >> on-going copies. Change the nfsd copy to send a reply if it was killed > >> with a signal. What I'm not sure if when it's killed I get any partial > >> bytes from the VFS, I don't think so. I think in that case, the only > >> thing that I might reply with is a 0bytes copy. And whatever client > >> that did sent a copy with a delegation stateid would deal with > >> restarting it with a different stateid. > >> > >> Having said all that, I'd like to ask why do you think handling > >> CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what > >> is currently there for the synchronous copy. Right now if the server > >> receives those operations it does nothing to the on-going synchronous > >> copy. > > > > Or for that matter, why should it be different from READ and > > WRITE--looks like those can also continue executing after a close, too. > > So, good point. > > > >> As you noted, the async copy takes a reference on the parent stateid > >> so the state won't go away while the async copy is going on. Why not > >> keep async copy same as the synchronous one? > > > > OK, so maybe it's not such a big deal. > > > > I'm still confused about the delegation case, though: a copy can take a > > very long time, so we can't just wait for the copy to end before > > revoking the delegation. So do we allow the copy to continue > > indefinitely even after the delegation stateid it was initiated with has > > long ago been revoked? > > > > Maybe it's not a problem. I guess I'm having trouble remembering why > > copies (or IO in general) uses stateids in the first place.... > > Sigh. No you are right, we shouldn't let it run long past the > revocation. I'm worried by it running long past the revocation, but I'm also not managing to come with a case where it causes an actual problem. --b. > With IO operations, once it starts an operation like > CLOSE/UNLOCK/DELEGRETURN can arrive at the server and it's ok because > IO isn't long, and once it's done the client will notice a change in > stateid and will choose a different stateid to do the next IO > operation. I guess that's why a synchronous copy is still bordering ok > because it's relatively short and the next copy will choose a > different stateid. > > Unless there are any big objections, I'll make the following changes > 1. CLOSE/UNLOCK will check for none-empty list and return ERR_DELAY. > 2. change the copy code to once received a signal to return the > partial copy back. > 3. DELEGRETURN will stop/kill any on-going copies, which would trigger > a partial copy reply and the next copy should choose a different > stateid. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Feb 16, 2018 at 1:12 PM, J. Bruce Fields <bfields@redhat.com> wrote: > On Fri, Feb 16, 2018 at 11:06:07AM -0500, Olga Kornievskaia wrote: >> On Thu, Feb 15, 2018 at 8:43 PM, J. Bruce Fields <bfields@redhat.com> wrote: >> > On Thu, Feb 15, 2018 at 05:18:55PM -0500, Olga Kornievskaia wrote: >> >> On Fri, Feb 2, 2018 at 4:45 PM, J. Bruce Fields <bfields@redhat.com> wrote: >> >> > Is it really OK to do a COPY with a delegation stateid? That sounds >> >> > messy if the delegation is revoked partway through the copy. >> >> >> >> Yes, according to the spec, it is ok to use any of the stateids. >> >> >> >> RFC 7862 15.2.3 >> >> The ca_dst_stateid MUST refer to a stateid that is valid for a WRITE >> >> operation and follows the rules for stateids in Sections 8.2.5 and >> >> 18.32.3 of [RFC5661].... while for an intra-server copy >> >> ca_src_stateid MUST refer >> >> to a stateid that is valid for a READ operation and follows the rules >> >> for stateids in Sections 8.2.5 and 18.22.3 of [RFC5661]. >> >> >> >> 8.2.5 is the section that says in the decreasing priority to choose >> >> delegation stateid, locking stateid, open stateid. >> >> >> >> Trying to deal with the delegation stateid seems to be rather complex. >> >> Here's what I could do (which I mentioned before already): >> >> 1. possibly change the client to never use a delegation stateid. >> >> 2. for CLOSE/UNLOCK send back ERR_DELAY if copy list is not empty. >> >> 3. for DELEGRETURN (or when the server initiates CB_RECALL?) kill any >> >> on-going copies. Change the nfsd copy to send a reply if it was killed >> >> with a signal. What I'm not sure if when it's killed I get any partial >> >> bytes from the VFS, I don't think so. I think in that case, the only >> >> thing that I might reply with is a 0bytes copy. And whatever client >> >> that did sent a copy with a delegation stateid would deal with >> >> restarting it with a different stateid. >> >> >> >> Having said all that, I'd like to ask why do you think handling >> >> CLOSE/LOCKU/DELEGRETURN should be any different for async copy vs what >> >> is currently there for the synchronous copy. Right now if the server >> >> receives those operations it does nothing to the on-going synchronous >> >> copy. >> > >> > Or for that matter, why should it be different from READ and >> > WRITE--looks like those can also continue executing after a close, too. >> > So, good point. >> > >> >> As you noted, the async copy takes a reference on the parent stateid >> >> so the state won't go away while the async copy is going on. Why not >> >> keep async copy same as the synchronous one? >> > >> > OK, so maybe it's not such a big deal. >> > >> > I'm still confused about the delegation case, though: a copy can take a >> > very long time, so we can't just wait for the copy to end before >> > revoking the delegation. So do we allow the copy to continue >> > indefinitely even after the delegation stateid it was initiated with has >> > long ago been revoked? >> > >> > Maybe it's not a problem. I guess I'm having trouble remembering why >> > copies (or IO in general) uses stateids in the first place.... >> >> Sigh. No you are right, we shouldn't let it run long past the >> revocation. > > I'm worried by it running long past the revocation, but I'm also not > managing to come with a case where it causes an actual problem. It seems wrong but I'm not sure exactly why. I thinking what if the server receives all state closing operations (delegreturn, unlock, close) and the copy is still going. A file is being changed after the file is closed seem weird. But as I mentioned, properly working client should be unlocking/closing the file until after the copy is done (or until offload_cancel is sent after ctrl-c). Who is responsible for making sure that the file isn't being accessed after it's closed? Is it server's or client's? Like client is making sure all the writes are done before it does the close. Should it be client's responsibility as well to make sure there are no on-going copies before doing the close? Is the question here about just delegations or are you questioning whether or not CLOSE/UNLOCK need to be delayed until the copy is finished? > --b. > >> With IO operations, once it starts an operation like >> CLOSE/UNLOCK/DELEGRETURN can arrive at the server and it's ok because >> IO isn't long, and once it's done the client will notice a change in >> stateid and will choose a different stateid to do the next IO >> operation. I guess that's why a synchronous copy is still bordering ok >> because it's relatively short and the next copy will choose a >> different stateid. >> >> Unless there are any big objections, I'll make the following changes >> 1. CLOSE/UNLOCK will check for none-empty list and return ERR_DELAY. >> 2. change the copy code to once received a signal to return the >> partial copy back. >> 3. DELEGRETURN will stop/kill any on-going copies, which would trigger >> a partial copy reply and the next copy should choose a different >> stateid. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Feb 16, 2018 at 03:53:05PM -0500, Olga Kornievskaia wrote: > It seems wrong but I'm not sure exactly why. I thinking what if the > server receives all state closing operations (delegreturn, unlock, > close) and the copy is still going. A file is being changed after the > file is closed seem weird. But as I mentioned, properly working client > should be unlocking/closing the file until after the copy is done (or > until offload_cancel is sent after ctrl-c). > > Who is responsible for making sure that the file isn't being accessed > after it's closed? Is it server's or client's? Like client is making > sure all the writes are done before it does the close. Should it be > client's responsibility as well to make sure there are no on-going > copies before doing the close? > > Is the question here about just delegations or are you questioning > whether or not CLOSE/UNLOCK need to be delayed until the copy is > finished? Yeah, I kinda suspect there's a bug in the CLOSE and UNLOCK case too. I don't know if the client really causes trouble for anyone but itself if it allows IO to go on past close, unlock, or delegreturn. If it's still going on after another client acquires a conflicting open, lock, or delegation, that could be a problem: a client that holds a mandatory lock, or an open with DENY_WRITE, or a delegation, has a right not to expect the file to change underneath it. We should already be safe against that in the delegation case thanks to the vfs checks in fs/locks.c:check_conflicting_open(). In the DENY_WRITE case I bet there's still bug. Maybe in the mandatory lock case, too. But those cases are rare and already have other problems. So, I guess I don't care too much unless someone's seeing another problem. Still, killing off a long-running copy is probably a good precaution? --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Feb 20, 2018 at 1:48 PM, J. Bruce Fields <bfields@fieldses.org> wrote: > On Fri, Feb 16, 2018 at 03:53:05PM -0500, Olga Kornievskaia wrote: >> It seems wrong but I'm not sure exactly why. I thinking what if the >> server receives all state closing operations (delegreturn, unlock, >> close) and the copy is still going. A file is being changed after the >> file is closed seem weird. But as I mentioned, properly working client >> should be unlocking/closing the file until after the copy is done (or >> until offload_cancel is sent after ctrl-c). >> >> Who is responsible for making sure that the file isn't being accessed >> after it's closed? Is it server's or client's? Like client is making >> sure all the writes are done before it does the close. Should it be >> client's responsibility as well to make sure there are no on-going >> copies before doing the close? >> >> Is the question here about just delegations or are you questioning >> whether or not CLOSE/UNLOCK need to be delayed until the copy is >> finished? > > Yeah, I kinda suspect there's a bug in the CLOSE and UNLOCK case too. > > I don't know if the client really causes trouble for anyone but itself > if it allows IO to go on past close, unlock, or delegreturn. > > If it's still going on after another client acquires a conflicting open, > lock, or delegation, that could be a problem: a client that holds a > mandatory lock, or an open with DENY_WRITE, or a delegation, has a right > not to expect the file to change underneath it. We should already be > safe against that in the delegation case thanks to the vfs checks in > fs/locks.c:check_conflicting_open(). > > In the DENY_WRITE case I bet there's still bug. Maybe in the mandatory > lock case, too. But those cases are rare and already have other > problems. > > So, I guess I don't care too much unless someone's seeing another > problem. > > Still, killing off a long-running copy is probably a good precaution? Now that I got back into the code and remembered it again, copy stateid is never associated with a delegation stateid. Copy is associated with the destination file's stateid which is opened for write. Since linux server does not give out write delegations, then the stateid is either an open stateid or lock stateid (in my testing I varied not locking and locking the destination file so to get open stateid as well as lock stateid to be used in the copy). If you want me to keep the code for killing off copy in the delegreturn because in the future there might be write delegations, I can but I recall the rule of thumb is to keep it simple, thus I'm inclined to remove it. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Mar 06, 2018 at 12:15:59PM -0500, Olga Kornievskaia wrote: > On Tue, Feb 20, 2018 at 1:48 PM, J. Bruce Fields <bfields@fieldses.org> wrote: > > Yeah, I kinda suspect there's a bug in the CLOSE and UNLOCK case too. > > > > I don't know if the client really causes trouble for anyone but itself > > if it allows IO to go on past close, unlock, or delegreturn. > > > > If it's still going on after another client acquires a conflicting open, > > lock, or delegation, that could be a problem: a client that holds a > > mandatory lock, or an open with DENY_WRITE, or a delegation, has a right > > not to expect the file to change underneath it. We should already be > > safe against that in the delegation case thanks to the vfs checks in > > fs/locks.c:check_conflicting_open(). > > > > In the DENY_WRITE case I bet there's still bug. Maybe in the mandatory > > lock case, too. But those cases are rare and already have other > > problems. > > > > So, I guess I don't care too much unless someone's seeing another > > problem. > > > > Still, killing off a long-running copy is probably a good precaution? > > Now that I got back into the code and remembered it again, copy > stateid is never associated with a delegation stateid. Copy is > associated with the destination file's stateid which is opened for > write. Since linux server does not give out write delegations, then > the stateid is either an open stateid or lock stateid (in my testing I > varied not locking and locking the destination file so to get open > stateid as well as lock stateid to be used in the copy). If you want > me to keep the code for killing off copy in the delegreturn because in > the future there might be write delegations, I can but I recall the > rule of thumb is to keep it simple, thus I'm inclined to remove it. I'm hoping to get to write delegations soon.... And you can actually write while holding a read delegation. (Currently knfsd breaks the delegation in that case, but I'm fixing that.) That said, I still haven't convinced myself there's any real issue here. So, I'm fine with leaving that out. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 3714231..2c88a95 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -119,6 +119,14 @@ struct nfsd_net { u32 clverifier_counter; struct svc_serv *nfsd_serv; + + /* + * clientid and stateid data for construction of net unique COPY + * stateids. + */ + u32 s2s_cp_cl_id; + struct idr s2s_cp_stateids; + spinlock_t s2s_cp_lock; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a020533..6876080 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1039,7 +1039,8 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) static __be32 nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stateid_t *src_stateid, struct file **src, - stateid_t *dst_stateid, struct file **dst) + stateid_t *dst_stateid, struct file **dst, + struct nfs4_stid **stid) { __be32 status; @@ -1053,7 +1054,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, dst_stateid, WR_STATE, dst, NULL, - NULL); + stid); if (status) { dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); goto out_put_src; @@ -1084,7 +1085,7 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) __be32 status; status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src, - &clone->cl_dst_stateid, &dst); + &clone->cl_dst_stateid, &dst, NULL); if (status) goto out; @@ -1117,8 +1118,6 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, static int nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) { - memcpy(©->cp_res.cb_stateid, ©->cp_dst_stateid, - sizeof(copy->cp_dst_stateid)); copy->cp_res.wr_stable_how = NFS_UNSTABLE; copy->cp_consecutive = 1; copy->cp_synchronous = sync; @@ -1180,10 +1179,15 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) atomic_inc(&dst->cp_clp->cl_refcount); dst->fh_dst = get_file(src->fh_dst); dst->fh_src = get_file(src->fh_src); + dst->stid = src->stid; + dst->cps = src->cps; } static void cleanup_async_copy(struct nfsd4_copy *copy) { + list_del(©->cps->cp_list); + nfs4_free_cp_state(copy->cps); + nfs4_put_stid(copy->stid); fput(copy->fh_dst); fput(copy->fh_src); spin_lock(©->cp_clp->async_lock); @@ -1225,7 +1229,7 @@ static int nfsd4_do_async_copy(void *data) status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, ©->fh_src, ©->cp_dst_stateid, - ©->fh_dst); + ©->fh_dst, ©->stid); if (status) goto out; @@ -1234,15 +1238,27 @@ static int nfsd4_do_async_copy(void *data) sizeof(struct knfsd_fh)); copy->net = SVC_NET(rqstp); if (!copy->cp_synchronous) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + status = nfsd4_init_copy_res(copy, 0); async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) { status = nfserrno(-ENOMEM); goto out; } + copy->cps = nfs4_alloc_init_cp_state(nn, copy->stid); + if (!copy->cps) { + status = nfserrno(-ENOMEM); + kfree(async_copy); + goto out; + } + /* take a reference on the parent stateid so it's not + * not freed by the copy compound + */ + atomic_inc(©->stid->sc_count); + memcpy(©->cp_res.cb_stateid, ©->cps->cp_stateid, + sizeof(copy->cps->cp_stateid)); dup_copy_fields(copy, async_copy); - memcpy(©->cp_res.cb_stateid, ©->cp_dst_stateid, - sizeof(copy->cp_dst_stateid)); spin_lock(&async_copy->cp_clp->async_lock); list_add(&async_copy->copies, &async_copy->cp_clp->async_copies); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index af40762..f9151f2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -658,6 +658,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla /* Will be incremented before return to client: */ atomic_set(&stid->sc_count, 1); spin_lock_init(&stid->sc_lock); + INIT_LIST_HEAD(&stid->sc_cp_list); /* * It shouldn't be a problem to reuse an opaque stateid value. @@ -674,6 +675,66 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla return NULL; } +/* + * Create a unique stateid_t to represent each COPY. Hang the copy + * stateids off the OPEN/LOCK/DELEG stateid from the client open + * the source file. + */ +struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn, + struct nfs4_stid *p_stid) +{ + struct nfs4_cp_state *cps; + int new_id; + + cps = kzalloc(sizeof(struct nfs4_cp_state), GFP_KERNEL); + if (!cps) + return NULL; + idr_preload(GFP_KERNEL); + spin_lock(&nn->s2s_cp_lock); + new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, cps, 0, 0, GFP_NOWAIT); + spin_unlock(&nn->s2s_cp_lock); + idr_preload_end(); + if (new_id < 0) + goto out_free; + cps->cp_stateid.si_opaque.so_id = new_id; + cps->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time; + cps->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; + cps->cp_p_stid = p_stid; + INIT_LIST_HEAD(&cps->cp_list); + list_add(&cps->cp_list, &p_stid->sc_cp_list); + + return cps; +out_free: + kfree(cps); + return NULL; +} + +void nfs4_free_cp_state(struct nfs4_cp_state *cps) +{ + struct nfsd_net *nn; + + nn = net_generic(cps->cp_p_stid->sc_client->net, nfsd_net_id); + spin_lock(&nn->s2s_cp_lock); + idr_remove(&nn->s2s_cp_stateids, cps->cp_stateid.si_opaque.so_id); + spin_unlock(&nn->s2s_cp_lock); + + kfree(cps); +} + +static void nfs4_free_cp_statelist(struct nfs4_stid *stid) +{ + struct nfs4_cp_state *cps; + + might_sleep(); + + while (!list_empty(&stid->sc_cp_list)) { + cps = list_first_entry(&stid->sc_cp_list, struct nfs4_cp_state, + cp_list); + list_del(&cps->cp_list); + nfs4_free_cp_state(cps); + } +} + static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) { struct nfs4_stid *stid; @@ -819,6 +880,9 @@ static void block_delegations(struct knfsd_fh *fh) } idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); spin_unlock(&clp->cl_lock); + + nfs4_free_cp_statelist(s); + s->sc_free(s); if (fp) put_nfs4_file(fp); @@ -6954,6 +7018,8 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); spin_lock_init(&nn->client_lock); + spin_lock_init(&nn->s2s_cp_lock); + idr_init(&nn->s2s_cp_stateids); spin_lock_init(&nn->blocked_locks_lock); INIT_LIST_HEAD(&nn->blocked_locks_lru); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6493df6..232270d 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1241,6 +1241,7 @@ static __net_init int nfsd_init_net(struct net *net) nn->nfsd4_grace = 90; nn->clverifier_counter = prandom_u32(); nn->clientid_counter = prandom_u32(); + nn->s2s_cp_cl_id = nn->clientid_counter++; return 0; out_idmap_error: diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index d58507b..b4de8ae 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -93,6 +93,7 @@ struct nfs4_stid { #define NFS4_REVOKED_DELEG_STID 16 #define NFS4_CLOSED_DELEG_STID 32 #define NFS4_LAYOUT_STID 64 + struct list_head sc_cp_list; unsigned char sc_type; stateid_t sc_stateid; spinlock_t sc_lock; @@ -102,6 +103,17 @@ struct nfs4_stid { }; /* + * Keep a list of stateids issued by the COPY, associate it with the + * parent OPEN/LOCK/DELEG stateid. Used for lookup by + * OFFLOAD_CANCEL and OFFLOAD_STATUS (as well as COPY_NOTIFY) + */ +struct nfs4_cp_state { + stateid_t cp_stateid; + struct list_head cp_list; /* per parent nfs4_stid */ + struct nfs4_stid *cp_p_stid; /* pointer to parent */ +}; + +/* * Represents a delegation stateid. The nfs4_client holds references to these * and they are put when it is being destroyed or when the delegation is * returned by the client: @@ -609,6 +621,8 @@ __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, struct nfs4_stid **s, struct nfsd_net *nn); struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, void (*sc_free)(struct nfs4_stid *)); +struct nfs4_cp_state *nfs4_alloc_init_cp_state(struct nfsd_net *nn, struct nfs4_stid *p_stid); +void nfs4_free_cp_state(struct nfs4_cp_state *cps); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 0a19954..27bac6a 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -535,6 +535,8 @@ struct nfsd4_copy { struct file *fh_src; struct file *fh_dst; struct net *net; + struct nfs4_stid *stid; + struct nfs4_cp_state *cps; struct list_head copies; struct task_struct *copy_task;
Generate a new stateid to be used for reply to the asynchronous COPY (this would also be used later by COPY_NOTIFY as well). Associate the stateid with the parent OPEN/LOCK/DELEG stateid that can be freed during the free of the parent stateid. However, right now deciding to bind the lifetime to when the vfs copy is done. This way don't need to keep the nfsd_net structure for the callback. The drawback is that time copy state information is available for query by OFFLOAD_STATUS is slightly less. Signed-off-by: Olga Kornievskaia <kolga@netapp.com> --- fs/nfsd/netns.h | 8 +++++++ fs/nfsd/nfs4proc.c | 32 +++++++++++++++++++------- fs/nfsd/nfs4state.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfsctl.c | 1 + fs/nfsd/state.h | 14 ++++++++++++ fs/nfsd/xdr4.h | 2 ++ 6 files changed, 115 insertions(+), 8 deletions(-)