Message ID | 20181019152905.32418-14-olga.kornievskaia@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | server-side support for "inter" SSC copy | expand |
On Fri, Oct 19, 2018 at 11:29:05AM -0400, Olga Kornievskaia wrote: > From: Olga Kornievskaia <kolga@netapp.com> > > Given a universal address, mount the source server from the destination > server. Use an internal mount. Call the NFS client nfs42_ssc_open to > obtain the NFS struct file suitable for nfsd_copy_range. > > Ability to do "inter" server-to-server depends on the an nfsd kernel > parameter "inter_copy_offload_enabled". > > Signed-off-by: Andy Adamson <andros@netapp.com> > Signed-off-by: Olga Kornievskaia <kolga@netapp.com> > --- > fs/nfsd/nfs4proc.c | 298 ++++++++++++++++++++++++++++++++++++++++++++++++--- > fs/nfsd/nfssvc.c | 6 ++ > fs/nfsd/xdr4.h | 5 + > include/linux/nfs4.h | 1 + > 4 files changed, 293 insertions(+), 17 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index 59e9d0c..6dcd80c 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -1153,6 +1153,229 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp) > while ((copy = nfsd4_get_copy(clp)) != NULL) > nfsd4_stop_copy(copy); > } > +#ifdef CONFIG_NFSD_V4_2_INTER_SSC > + > +extern struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, > + struct nfs_fh *src_fh, > + nfs4_stateid *stateid); > +extern void nfs42_ssc_close(struct file *filep); > + > +extern void nfs_sb_deactive(struct super_block *sb); > + > +#define NFSD42_INTERSSC_MOUNTOPS "minorversion=2,vers=4,addr=%s,clientaddr=%s" The nfs man page says "clientaddr=" has no effect on 4.2 mounts. Also, what's the "addr=" option for, isn't the server address already given in the mount string? (Honest question, I may be wrong here.) > + > +/** > + * Support one copy source server for now. > + */ > +static struct vfsmount * > +nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp) > +{ > + struct file_system_type *type; > + struct vfsmount *ss_mnt; > + struct nfs42_netaddr *naddr; > + struct sockaddr_storage tmp_addr; > + size_t tmp_addrlen, match_netid_len = 3; > + char *startsep = "", *endsep = "", *match_netid = "tcp"; > + char *ipaddr, *ipaddr2, *raw_data; > + int len, raw_len, status = -EINVAL; > + > + /* Currently support only NL4_NETADDR source server */ > + if (nss->nl4_type != NL4_NETADDR) { > + WARN(nss->nl4_type != NL4_NETADDR, > + "nfsd4_copy src server not NL4_NETADDR\n"); Won't nfsd4_decode_nl4_server actually let through NL4_NAME and NL4_URL? That would make this WARN() triggerable by a client--that's bad. > + goto out_err; > + } > + > + naddr = &nss->u.nl4_addr; > + > + tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr, > + naddr->addr_len, > + (struct sockaddr *)&tmp_addr, > + sizeof(tmp_addr)); > + if (tmp_addrlen == 0) > + goto out_err; > + > + if (tmp_addr.ss_family == AF_INET6) { > + startsep = "["; > + endsep = "]"; > + match_netid = "tcp6"; > + match_netid_len = 4; > + } > + > + if (naddr->netid_len != match_netid_len || > + strncmp(naddr->netid, match_netid, naddr->netid_len)) Just strcmp(naddr->netid, match_netid) would do the job. > + goto out_err; > + > + /* Construct the raw data for the vfs_kern_mount call */ > + len = RPC_MAX_ADDRBUFLEN + 1; > + ipaddr = kzalloc(len, GFP_KERNEL); > + if (!ipaddr) > + goto out_err; > + > + rpc_ntop((struct sockaddr *)&tmp_addr, ipaddr, len); > + > + /* 2 for ipv6 endsep and startsep. 3 for ":/" and trailing '/0'*/ > + ipaddr2 = kzalloc(len + 5, GFP_KERNEL); > + if (!ipaddr2) > + goto out_free_ipaddr; > + > + rpc_ntop((struct sockaddr *)&rqstp->rq_daddr, ipaddr2, len + 5); Replace the above by two calls to a function that does kmalloc+rpcntop? (Though actually I don't think we need ipaddr.) > + > + raw_len = strlen(NFSD42_INTERSSC_MOUNTOPS) + strlen(ipaddr) + > + strlen(ipaddr2); > + raw_data = kzalloc(raw_len, GFP_KERNEL); > + if (!raw_data) > + goto out_free_ipaddr2; > + > + snprintf(raw_data, raw_len, NFSD42_INTERSSC_MOUNTOPS, ipaddr, > + ipaddr2); > + > + status = -ENODEV; > + type = get_fs_type("nfs"); > + if (!type) > + goto out_free_rawdata; I believe you also need a put_filesystem after this. (e.g. see kernel/trace/trace.c:trace_automount().) > + > + /* Set the server:<export> for the vfs_kerne_mount call */ > + memset(ipaddr2, 0, len + 5); > + snprintf(ipaddr2, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); > + > + dprintk("%s Raw mount data: %s server:export %s\n", __func__, > + raw_data, ipaddr2); > + > + /* Use an 'internal' mount: MS_KERNMOUNT -> MNT_INTERNAL */ > + ss_mnt = vfs_kern_mount(type, MS_KERNMOUNT, ipaddr2, raw_data); > + if (IS_ERR(ss_mnt)) { > + status = PTR_ERR(ss_mnt); > + goto out_free_rawdata; > + } > + Let's combine the successful and failure cases, so the below should be something like: out_free_rawdata: kfree(raw_data); out_free_ipaddr2: kfree(ipaddr2); out_free_ipaddr: kfree(ipaddr); out_err: if (IS_ERR(ret)) dprintk("--> %s ERROR %d\n", __func__, status); return ret; > + kfree(raw_data); > + kfree(ipaddr2); > + kfree(ipaddr); > + > + return ss_mnt; > + > +out_free_rawdata: > + kfree(raw_data); > +out_free_ipaddr2: > + kfree(ipaddr2); > +out_free_ipaddr: > + kfree(ipaddr); > +out_err: > + dprintk("--> %s ERROR %d\n", __func__, status); > + return ERR_PTR(status); > +} > + > +static void > +nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) > +{ > + nfs_sb_deactive(ss_mnt->mnt_sb); > + mntput(ss_mnt); > +} > + > +/** > + * nfsd4_setup_inter_ssc > + * > + * Verify COPY destination stateid. > + * Connect to the source server with NFSv4.1. > + * Create the source struct file for nfsd_copy_range. > + * Called with COPY cstate: > + * SAVED_FH: source filehandle > + * CURRENT_FH: destination filehandle > + * > + * Returns errno (not nfserrxxx) > + */ > +static struct vfsmount * > +nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, > + struct nfsd4_compound_state *cstate, > + struct nfsd4_copy *copy) > +{ > + struct svc_fh *s_fh = NULL; > + stateid_t *s_stid = ©->cp_src_stateid; > + struct vfsmount *ss_mnt; > + __be32 status; > + > + /* Verify the destination stateid and set dst struct file*/ > + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > + ©->cp_dst_stateid, > + WR_STATE, ©->file_dst, NULL, > + NULL); > + if (status) { > + ss_mnt = ERR_PTR(be32_to_cpu(status)); That looks wrong. I don't think IS_ERR() is going to be true for that value. If we need to return either an nfserr or a pointer, best is probably to have the function return __be32 and have the pointer returned in an argument. (Thought I notice the only caller ignores the error value, I wonder if that's right.) > + goto out; > + } > + > + ss_mnt = nfsd4_interssc_connect(copy->cp_src, rqstp); > + if (IS_ERR(ss_mnt)) > + goto out; So this function can return -ERRNO, or nfserr_*, or a pointer? That won't work. > + s_fh = &cstate->save_fh; > + > + copy->c_fh.size = s_fh->fh_handle.fh_size; > + memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); > + copy->stateid.seqid = s_stid->si_generation; > + memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, > + sizeof(stateid_opaque_t)); > + > +out: > + return ss_mnt; > +} > + > +static void > +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *src, > + struct file *dst) > +{ > + nfs42_ssc_close(src); > + fput(src); > + fput(dst); > + mntput(ss_mnt); > +} > + > +#else /* CONFIG_NFSD_V4_2_INTER_SSC */ > + > +static struct vfsmount * > +nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, > + struct nfsd4_compound_state *cstate, > + struct nfsd4_copy *copy) > +{ > + return ERR_PTR(-EINVAL); I wonder if that's really the right error for the server-to-server-copy-unsupported case. > +} > + > +static void > +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *src, > + struct file *dst) > +{ > +} > + > +static void > +nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) > +{ > +} > + > +static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, > + struct nfs_fh *src_fh, > + nfs4_stateid *stateid) > +{ > + return NULL; > +} > +#endif /* CONFIG_NFSD_V4_2_INTER_SSC */ > + > +static __be32 > +nfsd4_setup_intra_ssc(struct svc_rqst *rqstp, > + struct nfsd4_compound_state *cstate, > + struct nfsd4_copy *copy) > +{ > + return nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, > + ©->file_src, ©->cp_dst_stateid, > + ©->file_dst, NULL); > +} > + > +static void > +nfsd4_cleanup_intra_ssc(struct file *src, struct file *dst) > +{ > + fput(src); > + fput(dst); > +} > > static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) > { > @@ -1217,12 +1440,16 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) > status = nfs_ok; > } > > - fput(copy->file_src); > - fput(copy->file_dst); > + if (copy->cp_src) /* Inter server SSC */ > + nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->file_src, > + copy->file_dst); > + else > + nfsd4_cleanup_intra_ssc(copy->file_src, copy->file_dst); > + > return status; > } > > -static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) > +static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) > { > dst->cp_src_pos = src->cp_src_pos; > dst->cp_dst_pos = src->cp_dst_pos; > @@ -1232,8 +1459,21 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) > memcpy(&dst->fh, &src->fh, sizeof(src->fh)); > dst->cp_clp = src->cp_clp; > dst->file_dst = get_file(src->file_dst); > - dst->file_src = get_file(src->file_src); > + if (!src->cp_src) /* for inter, file_src doesnt exist yet */ > + dst->file_src = get_file(src->file_src); > memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); > + if (src->cp_src) { > + dst->cp_src = kmalloc(sizeof(struct nl4_server), GFP_KERNEL); > + if (!dst->cp_src) > + return -ENOMEM; > + memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server)); > + } > + memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); > + memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); > + dst->ss_mnt = src->ss_mnt; > + > + return 0; > + > } > > static void cleanup_async_copy(struct nfsd4_copy *copy) > @@ -1244,6 +1484,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) > spin_lock(©->cp_clp->async_lock); > list_del(©->copies); > spin_unlock(©->cp_clp->async_lock); > + kfree(copy->cp_src); > nfs4_put_copy(copy); > } > > @@ -1252,7 +1493,18 @@ static int nfsd4_do_async_copy(void *data) > struct nfsd4_copy *copy = (struct nfsd4_copy *)data; > struct nfsd4_copy *cb_copy; > > + if (copy->cp_src) { /* Inter server SSC */ > + copy->file_src = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, > + ©->stateid); > + if (IS_ERR(copy->file_src)) { > + copy->nfserr = nfserr_offload_denied; > + nfsd4_interssc_disconnect(copy->ss_mnt); > + goto do_callback; > + } > + } > + > copy->nfserr = nfsd4_do_copy(copy, 0); > +do_callback: > cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); > if (!cb_copy) > goto out; > @@ -1276,11 +1528,19 @@ static int nfsd4_do_async_copy(void *data) > __be32 status; > struct nfsd4_copy *async_copy = NULL; > > - status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, > - ©->file_src, ©->cp_dst_stateid, > - ©->file_dst, NULL); > - if (status) > - goto out; > + if (copy->cp_src) { /* Inter server SSC */ > + if (!inter_copy_offload_enable || copy->cp_synchronous) { > + status = nfserr_notsupp; > + goto out; > + } > + copy->ss_mnt = nfsd4_setup_inter_ssc(rqstp, cstate, copy); > + if (IS_ERR(copy->ss_mnt)) > + return nfserr_offload_denied; We should check that this is the right error to return in all those failure cases. That's all I have for now. --b. > + } else { > + status = nfsd4_setup_intra_ssc(rqstp, cstate, copy); > + if (status) > + return status; > + } > > copy->cp_clp = cstate->clp; > memcpy(©->fh, &cstate->current_fh.fh_handle, > @@ -1291,15 +1551,15 @@ static int nfsd4_do_async_copy(void *data) > status = nfserrno(-ENOMEM); > async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); > if (!async_copy) > - goto out; > - if (!nfs4_init_cp_state(nn, copy)) { > - kfree(async_copy); > - goto out; > - } > + goto out_err; > + if (!nfs4_init_cp_state(nn, copy)) > + goto out_err; > refcount_set(&async_copy->refcount, 1); > memcpy(©->cp_res.cb_stateid, ©->cp_stateid, > sizeof(copy->cp_stateid)); > - dup_copy_fields(copy, async_copy); > + status = dup_copy_fields(copy, async_copy); > + if (status) > + goto out_err; > async_copy->copy_task = kthread_create(nfsd4_do_async_copy, > async_copy, "%s", "copy thread"); > if (IS_ERR(async_copy->copy_task)) > @@ -1310,13 +1570,17 @@ static int nfsd4_do_async_copy(void *data) > spin_unlock(&async_copy->cp_clp->async_lock); > wake_up_process(async_copy->copy_task); > status = nfs_ok; > - } else > + } else { > status = nfsd4_do_copy(copy, 1); > + } > out: > return status; > out_err: > cleanup_async_copy(async_copy); > - goto out; > + status = nfserrno(-ENOMEM); > + if (copy->cp_src) > + nfsd4_interssc_disconnect(copy->ss_mnt); > + goto out_err; > } > > struct nfsd4_copy * > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > index 89cb484..9d254e7 100644 > --- a/fs/nfsd/nfssvc.c > +++ b/fs/nfsd/nfssvc.c > @@ -30,6 +30,12 @@ > > #define NFSDDBG_FACILITY NFSDDBG_SVC > > +bool inter_copy_offload_enable; > +EXPORT_SYMBOL_GPL(inter_copy_offload_enable); > +module_param(inter_copy_offload_enable, bool, 0644); > +MODULE_PARM_DESC(inter_copy_offload_enable, > + "Enable inter server to server copy offload. Default: false"); > + > extern struct svc_program nfsd_program; > static int nfsd(void *vrqstp); > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index c98ef64..c7e3df1 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -546,7 +546,12 @@ struct nfsd4_copy { > struct task_struct *copy_task; > refcount_t refcount; > bool stopped; > + > + struct vfsmount *ss_mnt; > + struct nfs_fh c_fh; > + nfs4_stateid stateid; > }; > +extern bool inter_copy_offload_enable; > > struct nfsd4_seek { > /* request */ > diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h > index 4d76f87..e53a261 100644 > --- a/include/linux/nfs4.h > +++ b/include/linux/nfs4.h > @@ -17,6 +17,7 @@ > #include <linux/uidgid.h> > #include <uapi/linux/nfs4.h> > #include <linux/sunrpc/msg_prot.h> > +#include <linux/nfs.h> > > enum nfs4_acl_whotype { > NFS4_ACL_WHO_NAMED = 0, > -- > 1.8.3.1
On Wed, Nov 7, 2018 at 4:49 PM J. Bruce Fields <bfields@fieldses.org> wrote: > > On Fri, Oct 19, 2018 at 11:29:05AM -0400, Olga Kornievskaia wrote: > > From: Olga Kornievskaia <kolga@netapp.com> > > > > Given a universal address, mount the source server from the destination > > server. Use an internal mount. Call the NFS client nfs42_ssc_open to > > obtain the NFS struct file suitable for nfsd_copy_range. > > > > Ability to do "inter" server-to-server depends on the an nfsd kernel > > parameter "inter_copy_offload_enabled". > > > > Signed-off-by: Andy Adamson <andros@netapp.com> > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com> > > --- > > fs/nfsd/nfs4proc.c | 298 ++++++++++++++++++++++++++++++++++++++++++++++++--- > > fs/nfsd/nfssvc.c | 6 ++ > > fs/nfsd/xdr4.h | 5 + > > include/linux/nfs4.h | 1 + > > 4 files changed, 293 insertions(+), 17 deletions(-) > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > index 59e9d0c..6dcd80c 100644 > > --- a/fs/nfsd/nfs4proc.c > > +++ b/fs/nfsd/nfs4proc.c > > @@ -1153,6 +1153,229 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp) > > while ((copy = nfsd4_get_copy(clp)) != NULL) > > nfsd4_stop_copy(copy); > > } > > +#ifdef CONFIG_NFSD_V4_2_INTER_SSC > > + > > +extern struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, > > + struct nfs_fh *src_fh, > > + nfs4_stateid *stateid); > > +extern void nfs42_ssc_close(struct file *filep); > > + > > +extern void nfs_sb_deactive(struct super_block *sb); > > + > > +#define NFSD42_INTERSSC_MOUNTOPS "minorversion=2,vers=4,addr=%s,clientaddr=%s" > > The nfs man page says "clientaddr=" has no effect on 4.2 mounts. I only have nfs man page from RHEL7.5 and I don't see that. > Also, what's the "addr=" option for, isn't the server address already > given in the mount string? (Honest question, I may be wrong here.) I believe going thru the kernel vfs_kern_mount() we need to specify "addr=" otherwise it doesn't know which server to mount. > > > + > > +/** > > + * Support one copy source server for now. > > + */ > > +static struct vfsmount * > > +nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp) > > +{ > > + struct file_system_type *type; > > + struct vfsmount *ss_mnt; > > + struct nfs42_netaddr *naddr; > > + struct sockaddr_storage tmp_addr; > > + size_t tmp_addrlen, match_netid_len = 3; > > + char *startsep = "", *endsep = "", *match_netid = "tcp"; > > + char *ipaddr, *ipaddr2, *raw_data; > > + int len, raw_len, status = -EINVAL; > > + > > + /* Currently support only NL4_NETADDR source server */ > > + if (nss->nl4_type != NL4_NETADDR) { > > + WARN(nss->nl4_type != NL4_NETADDR, > > + "nfsd4_copy src server not NL4_NETADDR\n"); > > Won't nfsd4_decode_nl4_server actually let through NL4_NAME and NL4_URL? Yes. I think the logic would be not to limit the xdr functionality from not parsing it as if the support in the main code the xdr code doesn't change. > That would make this WARN() triggerable by a client--that's bad. Why? Would you rather it silently failed? > > + goto out_err; > > + } > > + > > + naddr = &nss->u.nl4_addr; > > + > > + tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr, > > + naddr->addr_len, > > + (struct sockaddr *)&tmp_addr, > > + sizeof(tmp_addr)); > > + if (tmp_addrlen == 0) > > + goto out_err; > > + > > + if (tmp_addr.ss_family == AF_INET6) { > > + startsep = "["; > > + endsep = "]"; > > + match_netid = "tcp6"; > > + match_netid_len = 4; > > + } > > + > > + if (naddr->netid_len != match_netid_len || > > + strncmp(naddr->netid, match_netid, naddr->netid_len)) > > Just strcmp(naddr->netid, match_netid) would do the job. Will change. > > + goto out_err; > > + > > + /* Construct the raw data for the vfs_kern_mount call */ > > + len = RPC_MAX_ADDRBUFLEN + 1; > > + ipaddr = kzalloc(len, GFP_KERNEL); > > + if (!ipaddr) > > + goto out_err; > > + > > + rpc_ntop((struct sockaddr *)&tmp_addr, ipaddr, len); > > + > > + /* 2 for ipv6 endsep and startsep. 3 for ":/" and trailing '/0'*/ > > + ipaddr2 = kzalloc(len + 5, GFP_KERNEL); > > + if (!ipaddr2) > > + goto out_free_ipaddr; > > + > > + rpc_ntop((struct sockaddr *)&rqstp->rq_daddr, ipaddr2, len + 5); > > Replace the above by two calls to a function that does kmalloc+rpcntop? > (Though actually I don't think we need ipaddr.) Will do. > > + > > + raw_len = strlen(NFSD42_INTERSSC_MOUNTOPS) + strlen(ipaddr) + > > + strlen(ipaddr2); > > + raw_data = kzalloc(raw_len, GFP_KERNEL); > > + if (!raw_data) > > + goto out_free_ipaddr2; > > + > > + snprintf(raw_data, raw_len, NFSD42_INTERSSC_MOUNTOPS, ipaddr, > > + ipaddr2); > > + > > + status = -ENODEV; > > + type = get_fs_type("nfs"); > > + if (!type) > > + goto out_free_rawdata; > > I believe you also need a put_filesystem after this. (e.g. see > kernel/trace/trace.c:trace_automount().) Got it. Thanks. > > > + > > + /* Set the server:<export> for the vfs_kerne_mount call */ > > + memset(ipaddr2, 0, len + 5); > > + snprintf(ipaddr2, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); > > + > > + dprintk("%s Raw mount data: %s server:export %s\n", __func__, > > + raw_data, ipaddr2); > > + > > + /* Use an 'internal' mount: MS_KERNMOUNT -> MNT_INTERNAL */ > > + ss_mnt = vfs_kern_mount(type, MS_KERNMOUNT, ipaddr2, raw_data); > > + if (IS_ERR(ss_mnt)) { > > + status = PTR_ERR(ss_mnt); > > + goto out_free_rawdata; > > + } > > + > > Let's combine the successful and failure cases, so the below should be > something like: > > out_free_rawdata: > kfree(raw_data); > out_free_ipaddr2: > kfree(ipaddr2); > out_free_ipaddr: > kfree(ipaddr); > out_err: > if (IS_ERR(ret)) > dprintk("--> %s ERROR %d\n", __func__, status); > return ret; Ok will do. > > > > + kfree(raw_data); > > + kfree(ipaddr2); > > + kfree(ipaddr); > > + > > + return ss_mnt; > > + > > +out_free_rawdata: > > + kfree(raw_data); > > +out_free_ipaddr2: > > + kfree(ipaddr2); > > +out_free_ipaddr: > > + kfree(ipaddr); > > +out_err: > > + dprintk("--> %s ERROR %d\n", __func__, status); > > + return ERR_PTR(status); > > +} > > + > > +static void > > +nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) > > +{ > > + nfs_sb_deactive(ss_mnt->mnt_sb); > > + mntput(ss_mnt); > > +} > > + > > +/** > > + * nfsd4_setup_inter_ssc > > + * > > + * Verify COPY destination stateid. > > + * Connect to the source server with NFSv4.1. > > + * Create the source struct file for nfsd_copy_range. > > + * Called with COPY cstate: > > + * SAVED_FH: source filehandle > > + * CURRENT_FH: destination filehandle > > + * > > + * Returns errno (not nfserrxxx) > > + */ > > +static struct vfsmount * > > +nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, > > + struct nfsd4_compound_state *cstate, > > + struct nfsd4_copy *copy) > > +{ > > + struct svc_fh *s_fh = NULL; > > + stateid_t *s_stid = ©->cp_src_stateid; > > + struct vfsmount *ss_mnt; > > + __be32 status; > > + > > + /* Verify the destination stateid and set dst struct file*/ > > + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > > + ©->cp_dst_stateid, > > + WR_STATE, ©->file_dst, NULL, > > + NULL); > > + if (status) { > > + ss_mnt = ERR_PTR(be32_to_cpu(status)); > > That looks wrong. I don't think IS_ERR() is going to be true for that > value. > > If we need to return either an nfserr or a pointer, best is probably to > have the function return __be32 and have the pointer returned in an > argument. > > (Thought I notice the only caller ignores the error value, I wonder if > that's right.) > > > + goto out; > > + } > > + > > + ss_mnt = nfsd4_interssc_connect(copy->cp_src, rqstp); > > + if (IS_ERR(ss_mnt)) > > + goto out; > > So this function can return -ERRNO, or nfserr_*, or a pointer? That > won't work. Ok I'll change the function to return the __be32 always. And return pointer will be one of the args. > > > + s_fh = &cstate->save_fh; > > + > > + copy->c_fh.size = s_fh->fh_handle.fh_size; > > + memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); > > + copy->stateid.seqid = s_stid->si_generation; > > + memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, > > + sizeof(stateid_opaque_t)); > > + > > +out: > > + return ss_mnt; > > +} > > + > > +static void > > +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *src, > > + struct file *dst) > > +{ > > + nfs42_ssc_close(src); > > + fput(src); > > + fput(dst); > > + mntput(ss_mnt); > > +} > > + > > +#else /* CONFIG_NFSD_V4_2_INTER_SSC */ > > + > > +static struct vfsmount * > > +nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, > > + struct nfsd4_compound_state *cstate, > > + struct nfsd4_copy *copy) > > +{ > > + return ERR_PTR(-EINVAL); > > I wonder if that's really the right error for the > server-to-server-copy-unsupported case. Should be not_supported because COPY itself is not supported. If COPY was supported but failed for whatever reason we couldn't mount then the error should be OFFLOAD_DENIED. > > > +} > > + > > +static void > > +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *src, > > + struct file *dst) > > +{ > > +} > > + > > +static void > > +nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) > > +{ > > +} > > + > > +static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, > > + struct nfs_fh *src_fh, > > + nfs4_stateid *stateid) > > +{ > > + return NULL; > > +} > > +#endif /* CONFIG_NFSD_V4_2_INTER_SSC */ > > + > > +static __be32 > > +nfsd4_setup_intra_ssc(struct svc_rqst *rqstp, > > + struct nfsd4_compound_state *cstate, > > + struct nfsd4_copy *copy) > > +{ > > + return nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, > > + ©->file_src, ©->cp_dst_stateid, > > + ©->file_dst, NULL); > > +} > > + > > +static void > > +nfsd4_cleanup_intra_ssc(struct file *src, struct file *dst) > > +{ > > + fput(src); > > + fput(dst); > > +} > > > > static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) > > { > > @@ -1217,12 +1440,16 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) > > status = nfs_ok; > > } > > > > - fput(copy->file_src); > > - fput(copy->file_dst); > > + if (copy->cp_src) /* Inter server SSC */ > > + nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->file_src, > > + copy->file_dst); > > + else > > + nfsd4_cleanup_intra_ssc(copy->file_src, copy->file_dst); > > + > > return status; > > } > > > > -static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) > > +static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) > > { > > dst->cp_src_pos = src->cp_src_pos; > > dst->cp_dst_pos = src->cp_dst_pos; > > @@ -1232,8 +1459,21 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) > > memcpy(&dst->fh, &src->fh, sizeof(src->fh)); > > dst->cp_clp = src->cp_clp; > > dst->file_dst = get_file(src->file_dst); > > - dst->file_src = get_file(src->file_src); > > + if (!src->cp_src) /* for inter, file_src doesnt exist yet */ > > + dst->file_src = get_file(src->file_src); > > memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); > > + if (src->cp_src) { > > + dst->cp_src = kmalloc(sizeof(struct nl4_server), GFP_KERNEL); > > + if (!dst->cp_src) > > + return -ENOMEM; > > + memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server)); > > + } > > + memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); > > + memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); > > + dst->ss_mnt = src->ss_mnt; > > + > > + return 0; > > + > > } > > > > static void cleanup_async_copy(struct nfsd4_copy *copy) > > @@ -1244,6 +1484,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) > > spin_lock(©->cp_clp->async_lock); > > list_del(©->copies); > > spin_unlock(©->cp_clp->async_lock); > > + kfree(copy->cp_src); > > nfs4_put_copy(copy); > > } > > > > @@ -1252,7 +1493,18 @@ static int nfsd4_do_async_copy(void *data) > > struct nfsd4_copy *copy = (struct nfsd4_copy *)data; > > struct nfsd4_copy *cb_copy; > > > > + if (copy->cp_src) { /* Inter server SSC */ > > + copy->file_src = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, > > + ©->stateid); > > + if (IS_ERR(copy->file_src)) { > > + copy->nfserr = nfserr_offload_denied; > > + nfsd4_interssc_disconnect(copy->ss_mnt); > > + goto do_callback; > > + } > > + } > > + > > copy->nfserr = nfsd4_do_copy(copy, 0); > > +do_callback: > > cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); > > if (!cb_copy) > > goto out; > > @@ -1276,11 +1528,19 @@ static int nfsd4_do_async_copy(void *data) > > __be32 status; > > struct nfsd4_copy *async_copy = NULL; > > > > - status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, > > - ©->file_src, ©->cp_dst_stateid, > > - ©->file_dst, NULL); > > - if (status) > > - goto out; > > + if (copy->cp_src) { /* Inter server SSC */ > > + if (!inter_copy_offload_enable || copy->cp_synchronous) { > > + status = nfserr_notsupp; > > + goto out; > > + } > > + copy->ss_mnt = nfsd4_setup_inter_ssc(rqstp, cstate, copy); > > + if (IS_ERR(copy->ss_mnt)) > > + return nfserr_offload_denied; > > We should check that this is the right error to return in all those > failure cases. Well once I change nfsd4_setup_inter_ssc() to return an error itself it'll just return status. But I'll double check the error returns. > That's all I have for now. Thank you for the reviews. I'm working on the next version. But in addition to this, I need the VFS piece with this patch series now because server piece needs the generic cross filesystem copy_file_range() support via do_splice because the server reads out of NFS and writes into the local file system. > > --b. > > > + } else { > > + status = nfsd4_setup_intra_ssc(rqstp, cstate, copy); > > + if (status) > > + return status; > > + } > > > > copy->cp_clp = cstate->clp; > > memcpy(©->fh, &cstate->current_fh.fh_handle, > > @@ -1291,15 +1551,15 @@ static int nfsd4_do_async_copy(void *data) > > status = nfserrno(-ENOMEM); > > async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); > > if (!async_copy) > > - goto out; > > - if (!nfs4_init_cp_state(nn, copy)) { > > - kfree(async_copy); > > - goto out; > > - } > > + goto out_err; > > + if (!nfs4_init_cp_state(nn, copy)) > > + goto out_err; > > refcount_set(&async_copy->refcount, 1); > > memcpy(©->cp_res.cb_stateid, ©->cp_stateid, > > sizeof(copy->cp_stateid)); > > - dup_copy_fields(copy, async_copy); > > + status = dup_copy_fields(copy, async_copy); > > + if (status) > > + goto out_err; > > async_copy->copy_task = kthread_create(nfsd4_do_async_copy, > > async_copy, "%s", "copy thread"); > > if (IS_ERR(async_copy->copy_task)) > > @@ -1310,13 +1570,17 @@ static int nfsd4_do_async_copy(void *data) > > spin_unlock(&async_copy->cp_clp->async_lock); > > wake_up_process(async_copy->copy_task); > > status = nfs_ok; > > - } else > > + } else { > > status = nfsd4_do_copy(copy, 1); > > + } > > out: > > return status; > > out_err: > > cleanup_async_copy(async_copy); > > - goto out; > > + status = nfserrno(-ENOMEM); > > + if (copy->cp_src) > > + nfsd4_interssc_disconnect(copy->ss_mnt); > > + goto out_err; > > } > > > > struct nfsd4_copy * > > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > > index 89cb484..9d254e7 100644 > > --- a/fs/nfsd/nfssvc.c > > +++ b/fs/nfsd/nfssvc.c > > @@ -30,6 +30,12 @@ > > > > #define NFSDDBG_FACILITY NFSDDBG_SVC > > > > +bool inter_copy_offload_enable; > > +EXPORT_SYMBOL_GPL(inter_copy_offload_enable); > > +module_param(inter_copy_offload_enable, bool, 0644); > > +MODULE_PARM_DESC(inter_copy_offload_enable, > > + "Enable inter server to server copy offload. Default: false"); > > + > > extern struct svc_program nfsd_program; > > static int nfsd(void *vrqstp); > > > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > > index c98ef64..c7e3df1 100644 > > --- a/fs/nfsd/xdr4.h > > +++ b/fs/nfsd/xdr4.h > > @@ -546,7 +546,12 @@ struct nfsd4_copy { > > struct task_struct *copy_task; > > refcount_t refcount; > > bool stopped; > > + > > + struct vfsmount *ss_mnt; > > + struct nfs_fh c_fh; > > + nfs4_stateid stateid; > > }; > > +extern bool inter_copy_offload_enable; > > > > struct nfsd4_seek { > > /* request */ > > diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h > > index 4d76f87..e53a261 100644 > > --- a/include/linux/nfs4.h > > +++ b/include/linux/nfs4.h > > @@ -17,6 +17,7 @@ > > #include <linux/uidgid.h> > > #include <uapi/linux/nfs4.h> > > #include <linux/sunrpc/msg_prot.h> > > +#include <linux/nfs.h> > > > > enum nfs4_acl_whotype { > > NFS4_ACL_WHO_NAMED = 0, > > -- > > 1.8.3.1
On Thu, Nov 08, 2018 at 02:16:04PM -0500, Olga Kornievskaia wrote: > On Wed, Nov 7, 2018 at 4:49 PM J. Bruce Fields <bfields@fieldses.org> wrote: > > > > On Fri, Oct 19, 2018 at 11:29:05AM -0400, Olga Kornievskaia wrote: > > > From: Olga Kornievskaia <kolga@netapp.com> > > > > > > Given a universal address, mount the source server from the destination > > > server. Use an internal mount. Call the NFS client nfs42_ssc_open to > > > obtain the NFS struct file suitable for nfsd_copy_range. > > > > > > Ability to do "inter" server-to-server depends on the an nfsd kernel > > > parameter "inter_copy_offload_enabled". > > > > > > Signed-off-by: Andy Adamson <andros@netapp.com> > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com> > > > --- > > > fs/nfsd/nfs4proc.c | 298 ++++++++++++++++++++++++++++++++++++++++++++++++--- > > > fs/nfsd/nfssvc.c | 6 ++ > > > fs/nfsd/xdr4.h | 5 + > > > include/linux/nfs4.h | 1 + > > > 4 files changed, 293 insertions(+), 17 deletions(-) > > > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > > index 59e9d0c..6dcd80c 100644 > > > --- a/fs/nfsd/nfs4proc.c > > > +++ b/fs/nfsd/nfs4proc.c > > > @@ -1153,6 +1153,229 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp) > > > while ((copy = nfsd4_get_copy(clp)) != NULL) > > > nfsd4_stop_copy(copy); > > > } > > > +#ifdef CONFIG_NFSD_V4_2_INTER_SSC > > > + > > > +extern struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, > > > + struct nfs_fh *src_fh, > > > + nfs4_stateid *stateid); > > > +extern void nfs42_ssc_close(struct file *filep); > > > + > > > +extern void nfs_sb_deactive(struct super_block *sb); > > > + > > > +#define NFSD42_INTERSSC_MOUNTOPS "minorversion=2,vers=4,addr=%s,clientaddr=%s" > > > > The nfs man page says "clientaddr=" has no effect on 4.2 mounts. > > I only have nfs man page from RHEL7.5 and I don't see that. From nfs-utils/utils/mount/nfs.man: NFS protocol versions 4.1 and 4.2 use the client-established TCP connection for callback requests, so do not require the server to connect to the client. This option is therefore only affect NFS version 4.0 mounts. (Maybe I should send a patch for that "is therefore" typo.) > > Also, what's the "addr=" option for, isn't the server address already > > given in the mount string? (Honest question, I may be wrong here.) > > I believe going thru the kernel vfs_kern_mount() we need to specify > "addr=" otherwise it doesn't know which server to mount. Yeah, now that I think of it I guess the kernel hasn't traditionally done DNS resolution so of course there'd have to be something like this. OK. > > > + > > > +/** > > > + * Support one copy source server for now. > > > + */ > > > +static struct vfsmount * > > > +nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp) > > > +{ > > > + struct file_system_type *type; > > > + struct vfsmount *ss_mnt; > > > + struct nfs42_netaddr *naddr; > > > + struct sockaddr_storage tmp_addr; > > > + size_t tmp_addrlen, match_netid_len = 3; > > > + char *startsep = "", *endsep = "", *match_netid = "tcp"; > > > + char *ipaddr, *ipaddr2, *raw_data; > > > + int len, raw_len, status = -EINVAL; > > > + > > > + /* Currently support only NL4_NETADDR source server */ > > > + if (nss->nl4_type != NL4_NETADDR) { > > > + WARN(nss->nl4_type != NL4_NETADDR, > > > + "nfsd4_copy src server not NL4_NETADDR\n"); > > > > Won't nfsd4_decode_nl4_server actually let through NL4_NAME and NL4_URL? > > Yes. I think the logic would be not to limit the xdr functionality > from not parsing it as if the support in the main code the xdr code > doesn't change. I think it would be simplest just to return the right error from nfsd4_decode_nl4_server() in the NL4_NAME/NL4_URL cases. > > That would make this WARN() triggerable by a client--that's bad. > > Why? Would you rather it silently failed? Returning an error would be fine. But it should never be possible for an ordinary user or somebody on the network to trigger a WARN() or a BUG(). Those should be reserved for things that we assume never happen (so they indicate that our assumptions are wrong, hence we have a possible kernel bug). > Thank you for the reviews. I'm working on the next version. But in > addition to this, I need the VFS piece with this patch series now > because server piece needs the generic cross filesystem > copy_file_range() support via do_splice because the server reads out > of NFS and writes into the local file system. OK. In addition to mailing the patches it might also be useful if you could point me to a git branch somewhere just to make sure I've got all the right prerequisites. --b.
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 59e9d0c..6dcd80c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1153,6 +1153,229 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp) while ((copy = nfsd4_get_copy(clp)) != NULL) nfsd4_stop_copy(copy); } +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + +extern struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, + struct nfs_fh *src_fh, + nfs4_stateid *stateid); +extern void nfs42_ssc_close(struct file *filep); + +extern void nfs_sb_deactive(struct super_block *sb); + +#define NFSD42_INTERSSC_MOUNTOPS "minorversion=2,vers=4,addr=%s,clientaddr=%s" + +/** + * Support one copy source server for now. + */ +static struct vfsmount * +nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp) +{ + struct file_system_type *type; + struct vfsmount *ss_mnt; + struct nfs42_netaddr *naddr; + struct sockaddr_storage tmp_addr; + size_t tmp_addrlen, match_netid_len = 3; + char *startsep = "", *endsep = "", *match_netid = "tcp"; + char *ipaddr, *ipaddr2, *raw_data; + int len, raw_len, status = -EINVAL; + + /* Currently support only NL4_NETADDR source server */ + if (nss->nl4_type != NL4_NETADDR) { + WARN(nss->nl4_type != NL4_NETADDR, + "nfsd4_copy src server not NL4_NETADDR\n"); + goto out_err; + } + + naddr = &nss->u.nl4_addr; + + tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr, + naddr->addr_len, + (struct sockaddr *)&tmp_addr, + sizeof(tmp_addr)); + if (tmp_addrlen == 0) + goto out_err; + + if (tmp_addr.ss_family == AF_INET6) { + startsep = "["; + endsep = "]"; + match_netid = "tcp6"; + match_netid_len = 4; + } + + if (naddr->netid_len != match_netid_len || + strncmp(naddr->netid, match_netid, naddr->netid_len)) + goto out_err; + + /* Construct the raw data for the vfs_kern_mount call */ + len = RPC_MAX_ADDRBUFLEN + 1; + ipaddr = kzalloc(len, GFP_KERNEL); + if (!ipaddr) + goto out_err; + + rpc_ntop((struct sockaddr *)&tmp_addr, ipaddr, len); + + /* 2 for ipv6 endsep and startsep. 3 for ":/" and trailing '/0'*/ + ipaddr2 = kzalloc(len + 5, GFP_KERNEL); + if (!ipaddr2) + goto out_free_ipaddr; + + rpc_ntop((struct sockaddr *)&rqstp->rq_daddr, ipaddr2, len + 5); + + raw_len = strlen(NFSD42_INTERSSC_MOUNTOPS) + strlen(ipaddr) + + strlen(ipaddr2); + raw_data = kzalloc(raw_len, GFP_KERNEL); + if (!raw_data) + goto out_free_ipaddr2; + + snprintf(raw_data, raw_len, NFSD42_INTERSSC_MOUNTOPS, ipaddr, + ipaddr2); + + status = -ENODEV; + type = get_fs_type("nfs"); + if (!type) + goto out_free_rawdata; + + /* Set the server:<export> for the vfs_kerne_mount call */ + memset(ipaddr2, 0, len + 5); + snprintf(ipaddr2, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); + + dprintk("%s Raw mount data: %s server:export %s\n", __func__, + raw_data, ipaddr2); + + /* Use an 'internal' mount: MS_KERNMOUNT -> MNT_INTERNAL */ + ss_mnt = vfs_kern_mount(type, MS_KERNMOUNT, ipaddr2, raw_data); + if (IS_ERR(ss_mnt)) { + status = PTR_ERR(ss_mnt); + goto out_free_rawdata; + } + + kfree(raw_data); + kfree(ipaddr2); + kfree(ipaddr); + + return ss_mnt; + +out_free_rawdata: + kfree(raw_data); +out_free_ipaddr2: + kfree(ipaddr2); +out_free_ipaddr: + kfree(ipaddr); +out_err: + dprintk("--> %s ERROR %d\n", __func__, status); + return ERR_PTR(status); +} + +static void +nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) +{ + nfs_sb_deactive(ss_mnt->mnt_sb); + mntput(ss_mnt); +} + +/** + * nfsd4_setup_inter_ssc + * + * Verify COPY destination stateid. + * Connect to the source server with NFSv4.1. + * Create the source struct file for nfsd_copy_range. + * Called with COPY cstate: + * SAVED_FH: source filehandle + * CURRENT_FH: destination filehandle + * + * Returns errno (not nfserrxxx) + */ +static struct vfsmount * +nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy) +{ + struct svc_fh *s_fh = NULL; + stateid_t *s_stid = ©->cp_src_stateid; + struct vfsmount *ss_mnt; + __be32 status; + + /* Verify the destination stateid and set dst struct file*/ + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + ©->cp_dst_stateid, + WR_STATE, ©->file_dst, NULL, + NULL); + if (status) { + ss_mnt = ERR_PTR(be32_to_cpu(status)); + goto out; + } + + ss_mnt = nfsd4_interssc_connect(copy->cp_src, rqstp); + if (IS_ERR(ss_mnt)) + goto out; + + s_fh = &cstate->save_fh; + + copy->c_fh.size = s_fh->fh_handle.fh_size; + memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); + copy->stateid.seqid = s_stid->si_generation; + memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, + sizeof(stateid_opaque_t)); + +out: + return ss_mnt; +} + +static void +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *src, + struct file *dst) +{ + nfs42_ssc_close(src); + fput(src); + fput(dst); + mntput(ss_mnt); +} + +#else /* CONFIG_NFSD_V4_2_INTER_SSC */ + +static struct vfsmount * +nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy) +{ + return ERR_PTR(-EINVAL); +} + +static void +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *src, + struct file *dst) +{ +} + +static void +nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) +{ +} + +static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, + struct nfs_fh *src_fh, + nfs4_stateid *stateid) +{ + return NULL; +} +#endif /* CONFIG_NFSD_V4_2_INTER_SSC */ + +static __be32 +nfsd4_setup_intra_ssc(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy) +{ + return nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, + ©->file_src, ©->cp_dst_stateid, + ©->file_dst, NULL); +} + +static void +nfsd4_cleanup_intra_ssc(struct file *src, struct file *dst) +{ + fput(src); + fput(dst); +} static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { @@ -1217,12 +1440,16 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) status = nfs_ok; } - fput(copy->file_src); - fput(copy->file_dst); + if (copy->cp_src) /* Inter server SSC */ + nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->file_src, + copy->file_dst); + else + nfsd4_cleanup_intra_ssc(copy->file_src, copy->file_dst); + return status; } -static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) +static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) { dst->cp_src_pos = src->cp_src_pos; dst->cp_dst_pos = src->cp_dst_pos; @@ -1232,8 +1459,21 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) memcpy(&dst->fh, &src->fh, sizeof(src->fh)); dst->cp_clp = src->cp_clp; dst->file_dst = get_file(src->file_dst); - dst->file_src = get_file(src->file_src); + if (!src->cp_src) /* for inter, file_src doesnt exist yet */ + dst->file_src = get_file(src->file_src); memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); + if (src->cp_src) { + dst->cp_src = kmalloc(sizeof(struct nl4_server), GFP_KERNEL); + if (!dst->cp_src) + return -ENOMEM; + memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server)); + } + memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); + memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); + dst->ss_mnt = src->ss_mnt; + + return 0; + } static void cleanup_async_copy(struct nfsd4_copy *copy) @@ -1244,6 +1484,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) spin_lock(©->cp_clp->async_lock); list_del(©->copies); spin_unlock(©->cp_clp->async_lock); + kfree(copy->cp_src); nfs4_put_copy(copy); } @@ -1252,7 +1493,18 @@ static int nfsd4_do_async_copy(void *data) struct nfsd4_copy *copy = (struct nfsd4_copy *)data; struct nfsd4_copy *cb_copy; + if (copy->cp_src) { /* Inter server SSC */ + copy->file_src = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, + ©->stateid); + if (IS_ERR(copy->file_src)) { + copy->nfserr = nfserr_offload_denied; + nfsd4_interssc_disconnect(copy->ss_mnt); + goto do_callback; + } + } + copy->nfserr = nfsd4_do_copy(copy, 0); +do_callback: cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!cb_copy) goto out; @@ -1276,11 +1528,19 @@ static int nfsd4_do_async_copy(void *data) __be32 status; struct nfsd4_copy *async_copy = NULL; - status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, - ©->file_src, ©->cp_dst_stateid, - ©->file_dst, NULL); - if (status) - goto out; + if (copy->cp_src) { /* Inter server SSC */ + if (!inter_copy_offload_enable || copy->cp_synchronous) { + status = nfserr_notsupp; + goto out; + } + copy->ss_mnt = nfsd4_setup_inter_ssc(rqstp, cstate, copy); + if (IS_ERR(copy->ss_mnt)) + return nfserr_offload_denied; + } else { + status = nfsd4_setup_intra_ssc(rqstp, cstate, copy); + if (status) + return status; + } copy->cp_clp = cstate->clp; memcpy(©->fh, &cstate->current_fh.fh_handle, @@ -1291,15 +1551,15 @@ static int nfsd4_do_async_copy(void *data) status = nfserrno(-ENOMEM); async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) - goto out; - if (!nfs4_init_cp_state(nn, copy)) { - kfree(async_copy); - goto out; - } + goto out_err; + if (!nfs4_init_cp_state(nn, copy)) + goto out_err; refcount_set(&async_copy->refcount, 1); memcpy(©->cp_res.cb_stateid, ©->cp_stateid, sizeof(copy->cp_stateid)); - dup_copy_fields(copy, async_copy); + status = dup_copy_fields(copy, async_copy); + if (status) + goto out_err; async_copy->copy_task = kthread_create(nfsd4_do_async_copy, async_copy, "%s", "copy thread"); if (IS_ERR(async_copy->copy_task)) @@ -1310,13 +1570,17 @@ static int nfsd4_do_async_copy(void *data) spin_unlock(&async_copy->cp_clp->async_lock); wake_up_process(async_copy->copy_task); status = nfs_ok; - } else + } else { status = nfsd4_do_copy(copy, 1); + } out: return status; out_err: cleanup_async_copy(async_copy); - goto out; + status = nfserrno(-ENOMEM); + if (copy->cp_src) + nfsd4_interssc_disconnect(copy->ss_mnt); + goto out_err; } struct nfsd4_copy * diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 89cb484..9d254e7 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -30,6 +30,12 @@ #define NFSDDBG_FACILITY NFSDDBG_SVC +bool inter_copy_offload_enable; +EXPORT_SYMBOL_GPL(inter_copy_offload_enable); +module_param(inter_copy_offload_enable, bool, 0644); +MODULE_PARM_DESC(inter_copy_offload_enable, + "Enable inter server to server copy offload. Default: false"); + extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index c98ef64..c7e3df1 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -546,7 +546,12 @@ struct nfsd4_copy { struct task_struct *copy_task; refcount_t refcount; bool stopped; + + struct vfsmount *ss_mnt; + struct nfs_fh c_fh; + nfs4_stateid stateid; }; +extern bool inter_copy_offload_enable; struct nfsd4_seek { /* request */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 4d76f87..e53a261 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -17,6 +17,7 @@ #include <linux/uidgid.h> #include <uapi/linux/nfs4.h> #include <linux/sunrpc/msg_prot.h> +#include <linux/nfs.h> enum nfs4_acl_whotype { NFS4_ACL_WHO_NAMED = 0,