Message ID | b750dd468dd3fe4af8febf3a0bf8bc278ca7c05e.1693400242.git.lorenzo@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | add rpc_status netlink support for NFSD | expand |
On Wed, Aug 30, 2023 at 03:05:46PM +0200, Lorenzo Bianconi wrote: > Introduce rpc_status netlink support for NFSD in order to dump pending > RPC requests debugging information from userspace. Very good to see this update! netdev has asked that all new netlink protocols start from a yaml spec that resides under Documentation/netlink/specs/ That spec is then used to generate netlink parser code for the kernel and for user space tooling. You can find this all described here: https://docs.kernel.org/next/userspace-api/netlink/specs.html and here is a weak example of how this might be done: https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git/commit/net/handshake?id=3b3009ea8abb713b022d94fba95ec270cf6e7eae I say weak because I did that work while the yaml spec tools were still under development. It might not completely reflect how this needs to be done today. So the yaml file would be named something like: Documentation/netlink/specs/nfsd.yaml and it would generate files "fs/nfsd/netlink.[ch]". It should generate a lot of the parser boiler plate you've written below by hand, so just replace that code with calls to the generated code. When you post the next revision of the series, cc: netdev. > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > --- > fs/nfsd/nfsctl.c | 275 +++++++++++++++++++++++++++++++++++++ > fs/nfsd/nfsd.h | 19 +++ > fs/nfsd/nfssvc.c | 15 ++ > fs/nfsd/state.h | 2 - > include/linux/sunrpc/svc.h | 1 + > include/uapi/linux/nfs.h | 54 ++++++++ > 6 files changed, 364 insertions(+), 2 deletions(-) > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > index 33f80d289d63..4626a0002ceb 100644 > --- a/fs/nfsd/nfsctl.c > +++ b/fs/nfsd/nfsctl.c > @@ -17,6 +17,9 @@ > #include <linux/sunrpc/rpc_pipe_fs.h> > #include <linux/module.h> > #include <linux/fsnotify.h> > +#include <net/genetlink.h> > +#include <net/ip.h> > +#include <net/ipv6.h> > > #include "idmap.h" > #include "nfsd.h" > @@ -1495,6 +1498,273 @@ static int create_proc_exports_entry(void) > > unsigned int nfsd_net_id; > > +/* the netlink family */ > +static struct genl_family nfsd_genl; > + > +static const struct nla_policy > +nfsd_rpc_status_compound_policy[NFS_ATTR_RPC_STATUS_COMPOUND_MAX + 1] = { > + [NFS_ATTR_RPC_STATUS_COMPOUND_OP] = { .type = NLA_STRING }, > +}; > + > +static const struct nla_policy > +nfsd_rpc_status_policy[NFS_ATTR_RPC_STATUS_MAX + 1] = { > + [NFS_ATTR_RPC_STATUS_XID] = { .type = NLA_U32 }, > + [NFS_ATTR_RPC_STATUS_FLAGS] = { .type = NLA_U32 }, > + [NFS_ATTR_RPC_STATUS_PC_NAME] = { .type = NLA_STRING }, > + [NFS_ATTR_RPC_STATUS_VERSION] = { .type = NLA_U8 }, > + [NFS_ATTR_RPC_STATUS_STIME] = { .type = NLA_S64 }, > + [NFS_ATTR_RPC_STATUS_SADDR4] = { .len = sizeof_field(struct iphdr, saddr) }, > + [NFS_ATTR_RPC_STATUS_DADDR4] = { .len = sizeof_field(struct iphdr, daddr) }, > + [NFS_ATTR_RPC_STATUS_SADDR6] = { .len = sizeof_field(struct ipv6hdr, saddr) }, > + [NFS_ATTR_RPC_STATUS_DADDR6] = { .len = sizeof_field(struct ipv6hdr, daddr) }, > + [NFS_ATTR_RPC_STATUS_SPORT] = { .type = NLA_U16 }, > + [NFS_ATTR_RPC_STATUS_DPORT] = { .type = NLA_U16 }, > + [NFS_ATTR_RPC_STATUS_COMPOUND] = > + NLA_POLICY_NESTED_ARRAY(nfsd_rpc_status_compound_policy), > +}; > + > +static const struct nla_policy > +nfsd_genl_policy[NFS_ATTR_MAX + 1] = { > + [NFS_ATTR_RPC_STATUS] = NLA_POLICY_NESTED_ARRAY(nfsd_rpc_status_policy), > +}; > + > +static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, int index, > + struct nfsd_genl_rqstp *rqstp) > +{ > + struct nlattr *rq_attr, *comp_attr; > + int i; > + > + rq_attr = nla_nest_start(skb, index); > + if (!rq_attr) > + return -ENOBUFS; > + > + if (nla_put_be32(skb, NFS_ATTR_RPC_STATUS_XID, rqstp->rq_xid) || > + nla_put_u32(skb, NFS_ATTR_RPC_STATUS_FLAGS, rqstp->rq_flags) || > + nla_put_string(skb, NFS_ATTR_RPC_STATUS_PC_NAME, rqstp->pc_name) || > + nla_put_u8(skb, NFS_ATTR_RPC_STATUS_VERSION, rqstp->rq_vers) || > + nla_put_s64(skb, NFS_ATTR_RPC_STATUS_STIME, > + ktime_to_us(rqstp->rq_stime), NFS_ATTR_RPC_STATUS_PAD)) > + return -ENOBUFS; > + > + switch (rqstp->saddr.sa_family) { > + case AF_INET: { > + const struct sockaddr_in *s_in, *d_in; > + > + s_in = (const struct sockaddr_in *)&rqstp->saddr; > + d_in = (const struct sockaddr_in *)&rqstp->daddr; > + if (nla_put_in_addr(skb, NFS_ATTR_RPC_STATUS_SADDR4, > + s_in->sin_addr.s_addr) || > + nla_put_in_addr(skb, NFS_ATTR_RPC_STATUS_DADDR4, > + d_in->sin_addr.s_addr) || > + nla_put_be16(skb, NFS_ATTR_RPC_STATUS_SPORT, > + s_in->sin_port) || > + nla_put_be16(skb, NFS_ATTR_RPC_STATUS_DPORT, > + d_in->sin_port)) > + return -ENOBUFS; > + break; > + } > + case AF_INET6: { > + const struct sockaddr_in6 *s_in, *d_in; > + > + s_in = (const struct sockaddr_in6 *)&rqstp->saddr; > + d_in = (const struct sockaddr_in6 *)&rqstp->daddr; > + if (nla_put_in6_addr(skb, NFS_ATTR_RPC_STATUS_SADDR6, > + &s_in->sin6_addr) || > + nla_put_in6_addr(skb, NFS_ATTR_RPC_STATUS_DADDR6, > + &d_in->sin6_addr) || > + nla_put_be16(skb, NFS_ATTR_RPC_STATUS_SPORT, > + s_in->sin6_port) || > + nla_put_be16(skb, NFS_ATTR_RPC_STATUS_DPORT, > + d_in->sin6_port)) > + return -ENOBUFS; > + break; > + } > + default: > + break; > + } > + > + comp_attr = nla_nest_start(skb, NFS_ATTR_RPC_STATUS_COMPOUND); > + if (!comp_attr) > + return -ENOBUFS; > + > + for (i = 0; i < rqstp->opcnt; i++) { > + struct nlattr *op_attr; > + > + op_attr = nla_nest_start(skb, i); > + if (!op_attr) > + return -ENOBUFS; > + > + if (nla_put_string(skb, NFS_ATTR_RPC_STATUS_COMPOUND_OP, > + nfsd4_op_name(rqstp->opnum[i]))) > + return -ENOBUFS; > + > + nla_nest_end(skb, op_attr); > + } > + > + nla_nest_end(skb, comp_attr); > + nla_nest_end(skb, rq_attr); > + > + return 0; > +} > + > +static int nfsd_genl_get_rpc_status(struct sk_buff *skb, struct genl_info *info) > +{ > + struct nfsd_net *nn = net_generic(genl_info_net(info), nfsd_net_id); > + struct nlattr *rpc_attr; > + int i, rqstp_index = 0; > + struct sk_buff *msg; > + void *hdr; > + > + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); > + if (!msg) > + return -ENOMEM; > + > + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &nfsd_genl, > + 0, NFS_CMD_NEW_RPC_STATUS); > + if (!hdr) { > + nlmsg_free(msg); > + return -ENOBUFS; > + } > + > + rpc_attr = nla_nest_start(msg, NFS_ATTR_RPC_STATUS); > + if (!rpc_attr) > + goto nla_put_failure; > + > + rcu_read_lock(); > + > + for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) { > + struct svc_rqst *rqstp; > + > + list_for_each_entry_rcu(rqstp, > + &nn->nfsd_serv->sv_pools[i].sp_all_threads, > + rq_all) { > + struct nfsd_genl_rqstp genl_rqstp; > + unsigned int status_counter; > + > + /* > + * Acquire rq_status_counter before parsing the rqst > + * fields. rq_status_counter is set to an odd value in > + * order to notify the consumers the rqstp fields are > + * meaningful. > + */ > + status_counter = > + smp_load_acquire(&rqstp->rq_status_counter); > + if (!(status_counter & 1)) > + continue; > + > + genl_rqstp.rq_xid = rqstp->rq_xid; > + genl_rqstp.rq_flags = rqstp->rq_flags; > + genl_rqstp.rq_vers = rqstp->rq_vers; > + genl_rqstp.pc_name = svc_proc_name(rqstp); > + genl_rqstp.rq_stime = rqstp->rq_stime; > + genl_rqstp.opcnt = 0; > + memcpy(&genl_rqstp.daddr, svc_daddr(rqstp), > + sizeof(struct sockaddr)); > + memcpy(&genl_rqstp.saddr, svc_addr(rqstp), > + sizeof(struct sockaddr)); > + > +#ifdef CONFIG_NFSD_V4 > + if (rqstp->rq_vers == NFS4_VERSION && > + rqstp->rq_proc == NFSPROC4_COMPOUND) { > + /* NFSv4 compund */ > + struct nfsd4_compoundargs *args; > + int j; > + > + args = rqstp->rq_argp; > + genl_rqstp.opcnt = args->opcnt; > + for (j = 0; j < genl_rqstp.opcnt; j++) > + genl_rqstp.opnum[j] = > + args->ops[j].opnum; > + } > +#endif /* CONFIG_NFSD_V4 */ > + > + /* > + * Acquire rq_status_counter before reporting the rqst > + * fields to the user. > + */ > + if (smp_load_acquire(&rqstp->rq_status_counter) != > + status_counter) > + continue; > + > + if (nfsd_genl_rpc_status_compose_msg(msg, > + rqstp_index++, > + &genl_rqstp)) > + goto nla_put_failure_rcu; > + } > + } > + > + rcu_read_unlock(); > + > + nla_nest_end(msg, rpc_attr); > + genlmsg_end(msg, hdr); > + > + return genlmsg_reply(msg, info); > + > +nla_put_failure_rcu: > + rcu_read_unlock(); > +nla_put_failure: > + genlmsg_cancel(msg, hdr); > + nlmsg_free(msg); > + > + return -EMSGSIZE; > +} > + > +static int nfsd_genl_pre_doit(const struct genl_split_ops *ops, > + struct sk_buff *skb, struct genl_info *info) > +{ > + struct nfsd_net *nn = net_generic(genl_info_net(info), nfsd_net_id); > + > + if (ops->internal_flags & NFSD_FLAG_NEED_REF_COUNT) { > + int ret = -ENODEV; > + > + mutex_lock(&nfsd_mutex); > + if (nn->nfsd_serv) { > + svc_get(nn->nfsd_serv); > + ret = 0; > + } > + mutex_unlock(&nfsd_mutex); > + > + return ret; > + } > + > + return 0; > +} > + > +static void nfsd_genl_post_doit(const struct genl_split_ops *ops, > + struct sk_buff *skb, struct genl_info *info) > +{ > + if (ops->internal_flags & NFSD_FLAG_NEED_REF_COUNT) { > + mutex_lock(&nfsd_mutex); > + nfsd_put(genl_info_net(info)); > + mutex_unlock(&nfsd_mutex); > + } > +} > + > +static struct genl_small_ops nfsd_genl_ops[] = { > + { > + .cmd = NFS_CMD_GET_RPC_STATUS, > + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, > + .doit = nfsd_genl_get_rpc_status, > + .internal_flags = NFSD_FLAG_NEED_REF_COUNT, > + }, > +}; > + > +static struct genl_family nfsd_genl __ro_after_init = { > + .name = "nfsd_server", > + .version = 1, > + .maxattr = NFS_ATTR_MAX, > + .module = THIS_MODULE, > + .netnsok = true, > + .parallel_ops = true, > + .hdrsize = 0, > + .pre_doit = nfsd_genl_pre_doit, > + .post_doit = nfsd_genl_post_doit, > + .policy = nfsd_genl_policy, > + .small_ops = nfsd_genl_ops, > + .n_small_ops = ARRAY_SIZE(nfsd_genl_ops), > + .resv_start_op = NFS_CMD_NEW_RPC_STATUS + 1, > +}; > + > /** > * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace > * @net: a freshly-created network namespace > @@ -1589,6 +1859,10 @@ static int __init init_nfsd(void) > retval = register_filesystem(&nfsd_fs_type); > if (retval) > goto out_free_all; > + retval = genl_register_family(&nfsd_genl); > + if (retval) > + goto out_free_all; > + > return 0; > out_free_all: > nfsd4_destroy_laundry_wq(); > @@ -1613,6 +1887,7 @@ static int __init init_nfsd(void) > > static void __exit exit_nfsd(void) > { > + genl_unregister_family(&nfsd_genl); > unregister_filesystem(&nfsd_fs_type); > nfsd4_destroy_laundry_wq(); > unregister_cld_notifier(); > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h > index e95c3322eb9b..749c871b3291 100644 > --- a/fs/nfsd/nfsd.h > +++ b/fs/nfsd/nfsd.h > @@ -62,6 +62,25 @@ struct readdir_cd { > __be32 err; /* 0, nfserr, or nfserr_eof */ > }; > > +enum nfsd_genl_internal_flag { > + NFSD_FLAG_NEED_REF_COUNT = BIT(0), > +}; > + > +/* Maximum number of operations per session compound */ > +#define NFSD_MAX_OPS_PER_COMPOUND 50 > + > +struct nfsd_genl_rqstp { > + struct sockaddr daddr; > + struct sockaddr saddr; > + unsigned long rq_flags; > + const char *pc_name; > + ktime_t rq_stime; > + __be32 rq_xid; > + u32 rq_vers; > + /* NFSv4 compund */ > + u32 opnum[NFSD_MAX_OPS_PER_COMPOUND]; > + u16 opcnt; > +}; > > extern struct svc_program nfsd_program; > extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > index 1582af33e204..fad34a7325b3 100644 > --- a/fs/nfsd/nfssvc.c > +++ b/fs/nfsd/nfssvc.c > @@ -998,6 +998,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp) > if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) > goto out_decode_err; > > + /* > + * Release rq_status_counter setting it to an odd value after the rpc > + * request has been properly parsed. rq_status_counter is used to > + * notify the consumers if the rqstp fields are stable > + * (rq_status_counter is odd) or not meaningful (rq_status_counter > + * is even). > + */ > + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1); > + > rp = NULL; > switch (nfsd_cache_lookup(rqstp, &rp)) { > case RC_DOIT: > @@ -1015,6 +1024,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp) > if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) > goto out_encode_err; > > + /* > + * Release rq_status_counter setting it to an even value after the rpc > + * request has been properly processed. > + */ > + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1); > + > nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1); > out_cached_reply: > return 1; > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index cbddcf484dba..41bdc913fa71 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -174,8 +174,6 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) > > /* Maximum number of slots per session. 160 is useful for long haul TCP */ > #define NFSD_MAX_SLOTS_PER_SESSION 160 > -/* Maximum number of operations per session compound */ > -#define NFSD_MAX_OPS_PER_COMPOUND 50 > /* Maximum session per slot cache size */ > #define NFSD_SLOT_CACHE_SIZE 2048 > /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h > index dbf5b21feafe..caa20defd255 100644 > --- a/include/linux/sunrpc/svc.h > +++ b/include/linux/sunrpc/svc.h > @@ -251,6 +251,7 @@ struct svc_rqst { > * net namespace > */ > void ** rq_lease_breaker; /* The v4 client breaking a lease */ > + unsigned int rq_status_counter; /* RPC processing counter */ > }; > > /* bits for rq_flags */ > diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h > index 946cb62d64b0..86a5daaaf9d9 100644 > --- a/include/uapi/linux/nfs.h > +++ b/include/uapi/linux/nfs.h > @@ -132,4 +132,58 @@ enum nfs_ftype { > NFFIFO = 8 > }; > > +enum nfs_commands { > + NFS_CMD_UNSPEC, > + > + NFS_CMD_GET_RPC_STATUS, > + NFS_CMD_NEW_RPC_STATUS, > + > + /* add new commands above here */ > + > + __NFS_CMD_MAX, > + NFS_CMD_MAX = __NFS_CMD_MAX - 1, > +}; > + > +enum nfs_rcp_status_compound_attrs { > + __NFS_ATTR_RPC_STATUS_COMPOUND_UNSPEC, > + NFS_ATTR_RPC_STATUS_COMPOUND_OP, > + > + /* keep it last */ > + NUM_NFS_ATTR_RPC_STATUS_COMPOUND, > + NFS_ATTR_RPC_STATUS_COMPOUND_MAX = NUM_NFS_ATTR_RPC_STATUS_COMPOUND - 1, > +}; > + > +enum nfs_rpc_status_attrs { > + __NFS_ATTR_RPC_STATUS_UNSPEC, > + > + NFS_ATTR_RPC_STATUS_XID, > + NFS_ATTR_RPC_STATUS_FLAGS, > + NFS_ATTR_RPC_STATUS_PC_NAME, > + NFS_ATTR_RPC_STATUS_VERSION, > + NFS_ATTR_RPC_STATUS_STIME, > + NFS_ATTR_RPC_STATUS_SADDR4, > + NFS_ATTR_RPC_STATUS_DADDR4, > + NFS_ATTR_RPC_STATUS_SADDR6, > + NFS_ATTR_RPC_STATUS_DADDR6, > + NFS_ATTR_RPC_STATUS_SPORT, > + NFS_ATTR_RPC_STATUS_DPORT, > + NFS_ATTR_RPC_STATUS_PAD, > + NFS_ATTR_RPC_STATUS_COMPOUND, > + > + /* keep it last */ > + NUM_NFS_ATTR_RPC_STATUS, > + NFS_ATTR_RPC_STATUS_MAX = NUM_NFS_ATTR_RPC_STATUS - 1, > +}; > + > +enum nfs_attrs { > + NFS_ATTR_UNSPEC, > + > + NFS_ATTR_RPC_STATUS, > + > + /* add new attributes above here */ > + > + __NFS_ATTR_MAX, > + NFS_ATTR_MAX = __NFS_ATTR_MAX - 1 > +}; > + > #endif /* _UAPI_LINUX_NFS_H */ > -- > 2.41.0 >
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 33f80d289d63..4626a0002ceb 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -17,6 +17,9 @@ #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/module.h> #include <linux/fsnotify.h> +#include <net/genetlink.h> +#include <net/ip.h> +#include <net/ipv6.h> #include "idmap.h" #include "nfsd.h" @@ -1495,6 +1498,273 @@ static int create_proc_exports_entry(void) unsigned int nfsd_net_id; +/* the netlink family */ +static struct genl_family nfsd_genl; + +static const struct nla_policy +nfsd_rpc_status_compound_policy[NFS_ATTR_RPC_STATUS_COMPOUND_MAX + 1] = { + [NFS_ATTR_RPC_STATUS_COMPOUND_OP] = { .type = NLA_STRING }, +}; + +static const struct nla_policy +nfsd_rpc_status_policy[NFS_ATTR_RPC_STATUS_MAX + 1] = { + [NFS_ATTR_RPC_STATUS_XID] = { .type = NLA_U32 }, + [NFS_ATTR_RPC_STATUS_FLAGS] = { .type = NLA_U32 }, + [NFS_ATTR_RPC_STATUS_PC_NAME] = { .type = NLA_STRING }, + [NFS_ATTR_RPC_STATUS_VERSION] = { .type = NLA_U8 }, + [NFS_ATTR_RPC_STATUS_STIME] = { .type = NLA_S64 }, + [NFS_ATTR_RPC_STATUS_SADDR4] = { .len = sizeof_field(struct iphdr, saddr) }, + [NFS_ATTR_RPC_STATUS_DADDR4] = { .len = sizeof_field(struct iphdr, daddr) }, + [NFS_ATTR_RPC_STATUS_SADDR6] = { .len = sizeof_field(struct ipv6hdr, saddr) }, + [NFS_ATTR_RPC_STATUS_DADDR6] = { .len = sizeof_field(struct ipv6hdr, daddr) }, + [NFS_ATTR_RPC_STATUS_SPORT] = { .type = NLA_U16 }, + [NFS_ATTR_RPC_STATUS_DPORT] = { .type = NLA_U16 }, + [NFS_ATTR_RPC_STATUS_COMPOUND] = + NLA_POLICY_NESTED_ARRAY(nfsd_rpc_status_compound_policy), +}; + +static const struct nla_policy +nfsd_genl_policy[NFS_ATTR_MAX + 1] = { + [NFS_ATTR_RPC_STATUS] = NLA_POLICY_NESTED_ARRAY(nfsd_rpc_status_policy), +}; + +static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, int index, + struct nfsd_genl_rqstp *rqstp) +{ + struct nlattr *rq_attr, *comp_attr; + int i; + + rq_attr = nla_nest_start(skb, index); + if (!rq_attr) + return -ENOBUFS; + + if (nla_put_be32(skb, NFS_ATTR_RPC_STATUS_XID, rqstp->rq_xid) || + nla_put_u32(skb, NFS_ATTR_RPC_STATUS_FLAGS, rqstp->rq_flags) || + nla_put_string(skb, NFS_ATTR_RPC_STATUS_PC_NAME, rqstp->pc_name) || + nla_put_u8(skb, NFS_ATTR_RPC_STATUS_VERSION, rqstp->rq_vers) || + nla_put_s64(skb, NFS_ATTR_RPC_STATUS_STIME, + ktime_to_us(rqstp->rq_stime), NFS_ATTR_RPC_STATUS_PAD)) + return -ENOBUFS; + + switch (rqstp->saddr.sa_family) { + case AF_INET: { + const struct sockaddr_in *s_in, *d_in; + + s_in = (const struct sockaddr_in *)&rqstp->saddr; + d_in = (const struct sockaddr_in *)&rqstp->daddr; + if (nla_put_in_addr(skb, NFS_ATTR_RPC_STATUS_SADDR4, + s_in->sin_addr.s_addr) || + nla_put_in_addr(skb, NFS_ATTR_RPC_STATUS_DADDR4, + d_in->sin_addr.s_addr) || + nla_put_be16(skb, NFS_ATTR_RPC_STATUS_SPORT, + s_in->sin_port) || + nla_put_be16(skb, NFS_ATTR_RPC_STATUS_DPORT, + d_in->sin_port)) + return -ENOBUFS; + break; + } + case AF_INET6: { + const struct sockaddr_in6 *s_in, *d_in; + + s_in = (const struct sockaddr_in6 *)&rqstp->saddr; + d_in = (const struct sockaddr_in6 *)&rqstp->daddr; + if (nla_put_in6_addr(skb, NFS_ATTR_RPC_STATUS_SADDR6, + &s_in->sin6_addr) || + nla_put_in6_addr(skb, NFS_ATTR_RPC_STATUS_DADDR6, + &d_in->sin6_addr) || + nla_put_be16(skb, NFS_ATTR_RPC_STATUS_SPORT, + s_in->sin6_port) || + nla_put_be16(skb, NFS_ATTR_RPC_STATUS_DPORT, + d_in->sin6_port)) + return -ENOBUFS; + break; + } + default: + break; + } + + comp_attr = nla_nest_start(skb, NFS_ATTR_RPC_STATUS_COMPOUND); + if (!comp_attr) + return -ENOBUFS; + + for (i = 0; i < rqstp->opcnt; i++) { + struct nlattr *op_attr; + + op_attr = nla_nest_start(skb, i); + if (!op_attr) + return -ENOBUFS; + + if (nla_put_string(skb, NFS_ATTR_RPC_STATUS_COMPOUND_OP, + nfsd4_op_name(rqstp->opnum[i]))) + return -ENOBUFS; + + nla_nest_end(skb, op_attr); + } + + nla_nest_end(skb, comp_attr); + nla_nest_end(skb, rq_attr); + + return 0; +} + +static int nfsd_genl_get_rpc_status(struct sk_buff *skb, struct genl_info *info) +{ + struct nfsd_net *nn = net_generic(genl_info_net(info), nfsd_net_id); + struct nlattr *rpc_attr; + int i, rqstp_index = 0; + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &nfsd_genl, + 0, NFS_CMD_NEW_RPC_STATUS); + if (!hdr) { + nlmsg_free(msg); + return -ENOBUFS; + } + + rpc_attr = nla_nest_start(msg, NFS_ATTR_RPC_STATUS); + if (!rpc_attr) + goto nla_put_failure; + + rcu_read_lock(); + + for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) { + struct svc_rqst *rqstp; + + list_for_each_entry_rcu(rqstp, + &nn->nfsd_serv->sv_pools[i].sp_all_threads, + rq_all) { + struct nfsd_genl_rqstp genl_rqstp; + unsigned int status_counter; + + /* + * Acquire rq_status_counter before parsing the rqst + * fields. rq_status_counter is set to an odd value in + * order to notify the consumers the rqstp fields are + * meaningful. + */ + status_counter = + smp_load_acquire(&rqstp->rq_status_counter); + if (!(status_counter & 1)) + continue; + + genl_rqstp.rq_xid = rqstp->rq_xid; + genl_rqstp.rq_flags = rqstp->rq_flags; + genl_rqstp.rq_vers = rqstp->rq_vers; + genl_rqstp.pc_name = svc_proc_name(rqstp); + genl_rqstp.rq_stime = rqstp->rq_stime; + genl_rqstp.opcnt = 0; + memcpy(&genl_rqstp.daddr, svc_daddr(rqstp), + sizeof(struct sockaddr)); + memcpy(&genl_rqstp.saddr, svc_addr(rqstp), + sizeof(struct sockaddr)); + +#ifdef CONFIG_NFSD_V4 + if (rqstp->rq_vers == NFS4_VERSION && + rqstp->rq_proc == NFSPROC4_COMPOUND) { + /* NFSv4 compund */ + struct nfsd4_compoundargs *args; + int j; + + args = rqstp->rq_argp; + genl_rqstp.opcnt = args->opcnt; + for (j = 0; j < genl_rqstp.opcnt; j++) + genl_rqstp.opnum[j] = + args->ops[j].opnum; + } +#endif /* CONFIG_NFSD_V4 */ + + /* + * Acquire rq_status_counter before reporting the rqst + * fields to the user. + */ + if (smp_load_acquire(&rqstp->rq_status_counter) != + status_counter) + continue; + + if (nfsd_genl_rpc_status_compose_msg(msg, + rqstp_index++, + &genl_rqstp)) + goto nla_put_failure_rcu; + } + } + + rcu_read_unlock(); + + nla_nest_end(msg, rpc_attr); + genlmsg_end(msg, hdr); + + return genlmsg_reply(msg, info); + +nla_put_failure_rcu: + rcu_read_unlock(); +nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); + + return -EMSGSIZE; +} + +static int nfsd_genl_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + struct nfsd_net *nn = net_generic(genl_info_net(info), nfsd_net_id); + + if (ops->internal_flags & NFSD_FLAG_NEED_REF_COUNT) { + int ret = -ENODEV; + + mutex_lock(&nfsd_mutex); + if (nn->nfsd_serv) { + svc_get(nn->nfsd_serv); + ret = 0; + } + mutex_unlock(&nfsd_mutex); + + return ret; + } + + return 0; +} + +static void nfsd_genl_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + if (ops->internal_flags & NFSD_FLAG_NEED_REF_COUNT) { + mutex_lock(&nfsd_mutex); + nfsd_put(genl_info_net(info)); + mutex_unlock(&nfsd_mutex); + } +} + +static struct genl_small_ops nfsd_genl_ops[] = { + { + .cmd = NFS_CMD_GET_RPC_STATUS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = nfsd_genl_get_rpc_status, + .internal_flags = NFSD_FLAG_NEED_REF_COUNT, + }, +}; + +static struct genl_family nfsd_genl __ro_after_init = { + .name = "nfsd_server", + .version = 1, + .maxattr = NFS_ATTR_MAX, + .module = THIS_MODULE, + .netnsok = true, + .parallel_ops = true, + .hdrsize = 0, + .pre_doit = nfsd_genl_pre_doit, + .post_doit = nfsd_genl_post_doit, + .policy = nfsd_genl_policy, + .small_ops = nfsd_genl_ops, + .n_small_ops = ARRAY_SIZE(nfsd_genl_ops), + .resv_start_op = NFS_CMD_NEW_RPC_STATUS + 1, +}; + /** * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace * @net: a freshly-created network namespace @@ -1589,6 +1859,10 @@ static int __init init_nfsd(void) retval = register_filesystem(&nfsd_fs_type); if (retval) goto out_free_all; + retval = genl_register_family(&nfsd_genl); + if (retval) + goto out_free_all; + return 0; out_free_all: nfsd4_destroy_laundry_wq(); @@ -1613,6 +1887,7 @@ static int __init init_nfsd(void) static void __exit exit_nfsd(void) { + genl_unregister_family(&nfsd_genl); unregister_filesystem(&nfsd_fs_type); nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index e95c3322eb9b..749c871b3291 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -62,6 +62,25 @@ struct readdir_cd { __be32 err; /* 0, nfserr, or nfserr_eof */ }; +enum nfsd_genl_internal_flag { + NFSD_FLAG_NEED_REF_COUNT = BIT(0), +}; + +/* Maximum number of operations per session compound */ +#define NFSD_MAX_OPS_PER_COMPOUND 50 + +struct nfsd_genl_rqstp { + struct sockaddr daddr; + struct sockaddr saddr; + unsigned long rq_flags; + const char *pc_name; + ktime_t rq_stime; + __be32 rq_xid; + u32 rq_vers; + /* NFSv4 compund */ + u32 opnum[NFSD_MAX_OPS_PER_COMPOUND]; + u16 opcnt; +}; extern struct svc_program nfsd_program; extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 1582af33e204..fad34a7325b3 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -998,6 +998,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp) if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; + /* + * Release rq_status_counter setting it to an odd value after the rpc + * request has been properly parsed. rq_status_counter is used to + * notify the consumers if the rqstp fields are stable + * (rq_status_counter is odd) or not meaningful (rq_status_counter + * is even). + */ + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1); + rp = NULL; switch (nfsd_cache_lookup(rqstp, &rp)) { case RC_DOIT: @@ -1015,6 +1024,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp) if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) goto out_encode_err; + /* + * Release rq_status_counter setting it to an even value after the rpc + * request has been properly processed. + */ + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1); + nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1); out_cached_reply: return 1; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index cbddcf484dba..41bdc913fa71 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -174,8 +174,6 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) /* Maximum number of slots per session. 160 is useful for long haul TCP */ #define NFSD_MAX_SLOTS_PER_SESSION 160 -/* Maximum number of operations per session compound */ -#define NFSD_MAX_OPS_PER_COMPOUND 50 /* Maximum session per slot cache size */ #define NFSD_SLOT_CACHE_SIZE 2048 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dbf5b21feafe..caa20defd255 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -251,6 +251,7 @@ struct svc_rqst { * net namespace */ void ** rq_lease_breaker; /* The v4 client breaking a lease */ + unsigned int rq_status_counter; /* RPC processing counter */ }; /* bits for rq_flags */ diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h index 946cb62d64b0..86a5daaaf9d9 100644 --- a/include/uapi/linux/nfs.h +++ b/include/uapi/linux/nfs.h @@ -132,4 +132,58 @@ enum nfs_ftype { NFFIFO = 8 }; +enum nfs_commands { + NFS_CMD_UNSPEC, + + NFS_CMD_GET_RPC_STATUS, + NFS_CMD_NEW_RPC_STATUS, + + /* add new commands above here */ + + __NFS_CMD_MAX, + NFS_CMD_MAX = __NFS_CMD_MAX - 1, +}; + +enum nfs_rcp_status_compound_attrs { + __NFS_ATTR_RPC_STATUS_COMPOUND_UNSPEC, + NFS_ATTR_RPC_STATUS_COMPOUND_OP, + + /* keep it last */ + NUM_NFS_ATTR_RPC_STATUS_COMPOUND, + NFS_ATTR_RPC_STATUS_COMPOUND_MAX = NUM_NFS_ATTR_RPC_STATUS_COMPOUND - 1, +}; + +enum nfs_rpc_status_attrs { + __NFS_ATTR_RPC_STATUS_UNSPEC, + + NFS_ATTR_RPC_STATUS_XID, + NFS_ATTR_RPC_STATUS_FLAGS, + NFS_ATTR_RPC_STATUS_PC_NAME, + NFS_ATTR_RPC_STATUS_VERSION, + NFS_ATTR_RPC_STATUS_STIME, + NFS_ATTR_RPC_STATUS_SADDR4, + NFS_ATTR_RPC_STATUS_DADDR4, + NFS_ATTR_RPC_STATUS_SADDR6, + NFS_ATTR_RPC_STATUS_DADDR6, + NFS_ATTR_RPC_STATUS_SPORT, + NFS_ATTR_RPC_STATUS_DPORT, + NFS_ATTR_RPC_STATUS_PAD, + NFS_ATTR_RPC_STATUS_COMPOUND, + + /* keep it last */ + NUM_NFS_ATTR_RPC_STATUS, + NFS_ATTR_RPC_STATUS_MAX = NUM_NFS_ATTR_RPC_STATUS - 1, +}; + +enum nfs_attrs { + NFS_ATTR_UNSPEC, + + NFS_ATTR_RPC_STATUS, + + /* add new attributes above here */ + + __NFS_ATTR_MAX, + NFS_ATTR_MAX = __NFS_ATTR_MAX - 1 +}; + #endif /* _UAPI_LINUX_NFS_H */
Introduce rpc_status netlink support for NFSD in order to dump pending RPC requests debugging information from userspace. Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> --- fs/nfsd/nfsctl.c | 275 +++++++++++++++++++++++++++++++++++++ fs/nfsd/nfsd.h | 19 +++ fs/nfsd/nfssvc.c | 15 ++ fs/nfsd/state.h | 2 - include/linux/sunrpc/svc.h | 1 + include/uapi/linux/nfs.h | 54 ++++++++ 6 files changed, 364 insertions(+), 2 deletions(-)