diff mbox series

[v2,26/27] NFSD: Add tracepoints in the NFS dispatcher

Message ID 160071198717.1468.14262284967190973528.stgit@klimt.1015granger.net (mailing list archive)
State New, archived
Headers show
Series NFSD operation monitoring tracepoints | expand

Commit Message

Chuck Lever Sept. 21, 2020, 6:13 p.m. UTC
This is follow-on work to the tracepoints added in the NFS server's
duplicate reply cache. Here, tracepoints are introduced that report
replies from cache as well as encoding and decoding errors.

The NFSv2, v3, and v4 dispatcher requirements have diverged over
time, leaving us with a little bit of technical debt. In addition,
I wanted to add a tracepoint for NFSv2 and NFSv3 similar to the
nfsd4_compound/compoundstatus tracepoints. Lastly, removing some
conditional branches from this hot path helps optimize CPU
utilization. So, I've duplicated the nfsd_dispatcher function.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs2acl.c  |    2 -
 fs/nfsd/nfs3acl.c  |    2 -
 fs/nfsd/nfs4proc.c |    2 -
 fs/nfsd/nfsd.h     |    1 
 fs/nfsd/nfssvc.c   |  198 ++++++++++++++++++++++++++++++++++------------------
 fs/nfsd/trace.h    |   50 +++++++++++++
 6 files changed, 183 insertions(+), 72 deletions(-)

Comments

J. Bruce Fields Sept. 24, 2020, 11:45 p.m. UTC | #1
On Mon, Sep 21, 2020 at 02:13:07PM -0400, Chuck Lever wrote:
> This is follow-on work to the tracepoints added in the NFS server's
> duplicate reply cache. Here, tracepoints are introduced that report
> replies from cache as well as encoding and decoding errors.
> 
> The NFSv2, v3, and v4 dispatcher requirements have diverged over
> time, leaving us with a little bit of technical debt. In addition,
> I wanted to add a tracepoint for NFSv2 and NFSv3 similar to the
> nfsd4_compound/compoundstatus tracepoints. Lastly, removing some
> conditional branches from this hot path helps optimize CPU
> utilization. So, I've duplicated the nfsd_dispatcher function.

Comparing current nfsd_dispatch to the nfsv2/v3 nfsd_dispatch: the only
thing I spotted removed from the v2/v3-specific dispatch is the
rq_lease_breaker = NULL.  (I think that's not correct, actually.  We
could remove the need for that to be set in the v2/v3 case, but with the
current code it does need to be set.)

Comparing current nfsd_dispatch to the nfsv4 nfsd4_dispatch, the
v4-specific dispatch does away with nfs_request_too_big() and the
v2-specific shortcut in the error encoding case.

So these still look *very* similar.  I don't feel like we're getting a
lot of benefit out of splitting these out.

By the way, the combination of copying a bunch of code, doing some
common cleanup, and dropping version-specific stuff makes it a little
hard to sort out what's going on.  If it were me, I'd do this as:

	- one patch to do common cleanup (dropping some redundant
	  comments, using argv/resv variables to cleanup calculations,
	  etc.)
	- a second patch that just duplicates the one nfsd_dispatch into
	  nfsd_dispatch and nfsd4_dispatch
	- a third patch that just removes the stuff we don't need from
	  the newly duplicated dispatchers.

and then it'd be obvious what's changed.

--b.

> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/nfs2acl.c  |    2 -
>  fs/nfsd/nfs3acl.c  |    2 -
>  fs/nfsd/nfs4proc.c |    2 -
>  fs/nfsd/nfsd.h     |    1 
>  fs/nfsd/nfssvc.c   |  198 ++++++++++++++++++++++++++++++++++------------------
>  fs/nfsd/trace.h    |   50 +++++++++++++
>  6 files changed, 183 insertions(+), 72 deletions(-)
> 
> diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
> index 54e597918822..894b8f0594e2 100644
> --- a/fs/nfsd/nfs2acl.c
> +++ b/fs/nfsd/nfs2acl.c
> @@ -416,6 +416,6 @@ const struct svc_version nfsd_acl_version2 = {
>  	.vs_nproc	= 5,
>  	.vs_proc	= nfsd_acl_procedures2,
>  	.vs_count	= nfsd_acl_count2,
> -	.vs_dispatch	= nfsd_dispatch,
> +	.vs_dispatch	= nfsd4_dispatch,
>  	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
>  };
> diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
> index 7f512dec7460..924ebb19c59c 100644
> --- a/fs/nfsd/nfs3acl.c
> +++ b/fs/nfsd/nfs3acl.c
> @@ -282,7 +282,7 @@ const struct svc_version nfsd_acl_version3 = {
>  	.vs_nproc	= 3,
>  	.vs_proc	= nfsd_acl_procedures3,
>  	.vs_count	= nfsd_acl_count3,
> -	.vs_dispatch	= nfsd_dispatch,
> +	.vs_dispatch	= nfsd4_dispatch,
>  	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
>  };
>  
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 49109645af24..61302641f651 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -3320,7 +3320,7 @@ const struct svc_version nfsd_version4 = {
>  	.vs_nproc		= 2,
>  	.vs_proc		= nfsd_procedures4,
>  	.vs_count		= nfsd_count3,
> -	.vs_dispatch		= nfsd_dispatch,
> +	.vs_dispatch		= nfsd4_dispatch,
>  	.vs_xdrsize		= NFS4_SVC_XDRSIZE,
>  	.vs_rpcb_optnl		= true,
>  	.vs_need_cong_ctrl	= true,
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index cb742e17e04a..7fa4b19dd2f7 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -78,6 +78,7 @@ extern const struct seq_operations nfs_exports_op;
>   */
>  int		nfsd_svc(int nrservs, struct net *net, const struct cred *cred);
>  int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
> +int		nfsd4_dispatch(struct svc_rqst *rqstp, __be32 *statp);
>  
>  int		nfsd_nrthreads(struct net *);
>  int		nfsd_nrpools(struct net *);
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index f7f6473578af..d626eea1c78a 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -28,6 +28,7 @@
>  #include "vfs.h"
>  #include "netns.h"
>  #include "filecache.h"
> +#include "trace.h"
>  
>  #define NFSDDBG_FACILITY	NFSDDBG_SVC
>  
> @@ -964,7 +965,7 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
>  {
>  	if (nfserr == nfserr_jukebox && vers == 2)
>  		return nfserr_dropit;
> -	if (nfserr == nfserr_wrongsec && vers < 4)
> +	if (nfserr == nfserr_wrongsec)
>  		return nfserr_acces;
>  	return nfserr;
>  }
> @@ -980,18 +981,6 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
>  static bool nfs_request_too_big(struct svc_rqst *rqstp,
>  				const struct svc_procedure *proc)
>  {
> -	/*
> -	 * The ACL code has more careful bounds-checking and is not
> -	 * susceptible to this problem:
> -	 */
> -	if (rqstp->rq_prog != NFS_PROGRAM)
> -		return false;
> -	/*
> -	 * Ditto NFSv4 (which can in theory have argument and reply both
> -	 * more than a page):
> -	 */
> -	if (rqstp->rq_vers >= 4)
> -		return false;
>  	/* The reply will be small, we're OK: */
>  	if (proc->pc_xdrressize > 0 &&
>  	    proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE))
> @@ -1000,81 +989,152 @@ static bool nfs_request_too_big(struct svc_rqst *rqstp,
>  	return rqstp->rq_arg.len > PAGE_SIZE;
>  }
>  
> -int
> -nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
> +/**
> + * nfsd_dispatch - Process an NFSv2 or NFSv3 request
> + * @rqstp: incoming NFS request
> + * @statp: OUT: RPC accept_stat value
> + *
> + * Return values:
> + *  %0: Processing complete; do not send a Reply
> + *  %1: Processing complete; send a Reply
> + */
> +int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
>  {
> -	const struct svc_procedure *proc;
> -	__be32			nfserr;
> -	__be32			*nfserrp;
> -
> -	dprintk("nfsd_dispatch: vers %d proc %d\n",
> -				rqstp->rq_vers, rqstp->rq_proc);
> -	proc = rqstp->rq_procinfo;
> -
> -	if (nfs_request_too_big(rqstp, proc)) {
> -		dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers);
> -		*statp = rpc_garbage_args;
> -		return 1;
> +	const struct svc_procedure *proc = rqstp->rq_procinfo;
> +	struct kvec *argv = &rqstp->rq_arg.head[0];
> +	struct kvec *resv = &rqstp->rq_res.head[0];
> +	__be32 nfserr, *nfserrp;
> +
> +	if (nfs_request_too_big(rqstp, proc))
> +		goto out_too_large;
> +
> +	if (proc->pc_decode && !procp->pc_decode(rqstp, argv->iov_base))
> +		goto out_decode_err;
> +
> +	rqstp->rq_cachetype = proc->pc_cachetype;
> +	switch (nfsd_cache_lookup(rqstp)) {
> +	case RC_DROPIT:
> +		goto out_dropit;
> +	case RC_REPLY:
> +		goto out_cached_reply;
> +	case RC_DOIT:
> +		break;
>  	}
> -	rqstp->rq_lease_breaker = NULL;
> +
> +	nfserrp = resv->iov_base + resv->iov_len;
> +	resv->iov_len += sizeof(__be32);
> +	nfserr = proc->pc_func(rqstp);
> +	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
> +	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags))
> +		goto out_update_drop;
> +	if (rqstp->rq_proc)
> +		*nfserrp++ = nfserr;
> +
> +	/* For NFSv2, additional info is never returned in case of an error. */
> +	if (!(nfserr && rqstp->rq_vers == 2))
> +		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp))
> +			goto out_encode_err;
> +
> +	nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
> +	trace_nfsd_svc_status(rqstp, proc, nfserr);
> +	return 1;
> +
> +out_too_large:
> +	trace_nfsd_svc_too_large_err(rqstp);
> +	*statp = rpc_garbage_args;
> +	return 1;
> +
> +out_decode_err:
> +	trace_nfsd_svc_decode_err(rqstp);
> +	*statp = rpc_garbage_args;
> +	return 1;
> +
> +out_update_drop:
> +	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
> +out_dropit:
> +	trace_nfsd_svc_dropit(rqstp);
> +	return 0;
> +
> +out_cached_reply:
> +	trace_nfsd_svc_cached_reply(rqstp);
> +	return 1;
> +
> +out_encode_err:
> +	trace_nfsd_svc_encode_err(rqstp);
> +	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
> +	*statp = rpc_system_err;
> +	return 1;
> +}
> +
> +/**
> + * nfsd4_dispatch - Process an NFSv4 or NFSACL request
> + * @rqstp: incoming NFS request
> + * @statp: OUT: RPC accept_stat value
> + *
> + * Return values:
> + *  %0: Processing complete; do not send a Reply
> + *  %1: Processing complete; send a Reply
> + */
> +int nfsd4_dispatch(struct svc_rqst *rqstp, __be32 *statp)
> +{
> +	const struct svc_procedure *proc = rqstp->rq_procinfo;
> +	struct kvec *argv = &rqstp->rq_arg.head[0];
> +	struct kvec *resv = &rqstp->rq_res.head[0];
> +	__be32 nfserr, *nfserrp;
> +
>  	/*
>  	 * Give the xdr decoder a chance to change this if it wants
>  	 * (necessary in the NFSv4.0 compound case)
>  	 */
>  	rqstp->rq_cachetype = proc->pc_cachetype;
> -	/* Decode arguments */
> -	if (proc->pc_decode &&
> -	    !proc->pc_decode(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base)) {
> -		dprintk("nfsd: failed to decode arguments!\n");
> -		*statp = rpc_garbage_args;
> -		return 1;
> -	}
> +	rqstp->rq_lease_breaker = NULL;
> +
> +	if (proc->pc_decode && !procp->pc_decode(rqstp, argv->iov_base))
> +		goto out_decode_err;
>  
> -	/* Check whether we have this call in the cache. */
>  	switch (nfsd_cache_lookup(rqstp)) {
>  	case RC_DROPIT:
> -		return 0;
> +		goto out_dropit;
>  	case RC_REPLY:
> -		return 1;
> -	case RC_DOIT:;
> -		/* do it */
> +		goto out_cached_reply;
> +	case RC_DOIT:
> +		break;
>  	}
>  
> -	/* need to grab the location to store the status, as
> -	 * nfsv4 does some encoding while processing 
> -	 */
> -	nfserrp = rqstp->rq_res.head[0].iov_base
> -		+ rqstp->rq_res.head[0].iov_len;
> -	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
> -
> -	/* Now call the procedure handler, and encode NFS status. */
> +	nfserrp = resv->iov_base + resv->iov_len;
> +	resv->iov_len += sizeof(__be32);
>  	nfserr = proc->pc_func(rqstp);
> -	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
> -	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
> -		dprintk("nfsd: Dropping request; may be revisited later\n");
> -		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
> -		return 0;
> -	}
> -
> -	if (rqstp->rq_proc != 0)
> +	if (test_bit(RQ_DROPME, &rqstp->rq_flags))
> +		goto out_update_drop;
> +	if (rqstp->rq_proc)
>  		*nfserrp++ = nfserr;
>  
> -	/* Encode result.
> -	 * For NFSv2, additional info is never returned in case of an error.
> -	 */
> -	if (!(nfserr && rqstp->rq_vers == 2)) {
> -		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp)) {
> -			/* Failed to encode result. Release cache entry */
> -			dprintk("nfsd: failed to encode result!\n");
> -			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
> -			*statp = rpc_system_err;
> -			return 1;
> -		}
> -	}
> +	if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp))
> +		goto out_encode_err;
>  
> -	/* Store reply in cache. */
>  	nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
>  	return 1;
> +
> +out_decode_err:
> +	trace_nfsd_svc_decode_err(rqstp);
> +	*statp = rpc_garbage_args;
> +	return 1;
> +
> +out_update_drop:
> +	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
> +out_dropit:
> +	trace_nfsd_svc_dropit(rqstp);
> +	return 0;
> +
> +out_cached_reply:
> +	trace_nfsd_svc_cached_reply(rqstp);
> +	return 1;
> +
> +out_encode_err:
> +	trace_nfsd_svc_encode_err(rqstp);
> +	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
> +	*statp = rpc_system_err;
> +	return 1;
>  }
>  
>  int nfsd_pool_stats_open(struct inode *inode, struct file *file)
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index 53115fbc00b2..50ab4a84c25f 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -32,6 +32,56 @@
>  		{ NFSD_MAY_READ_IF_EXEC,	"READ_IF_EXEC" },	\
>  		{ NFSD_MAY_64BIT_COOKIE,	"64BIT_COOKIE" })
>  
> +DECLARE_EVENT_CLASS(nfsd_simple_class,
> +	TP_PROTO(
> +		const struct svc_rqst *rqstp
> +	),
> +	TP_ARGS(rqstp),
> +	TP_STRUCT__entry(
> +		__field(u32, xid)
> +	),
> +	TP_fast_assign(
> +		__entry->xid = be32_to_cpu(rqstp->rq_xid);
> +	),
> +	TP_printk("xid=0x%08x", __entry->xid)
> +);
> +
> +#define DEFINE_NFSD_SIMPLE_EVENT(name)			\
> +DEFINE_EVENT(nfsd_simple_class, nfsd_##name,		\
> +	TP_PROTO(const struct svc_rqst *rqstp),		\
> +	TP_ARGS(rqstp))
> +
> +DEFINE_NFSD_SIMPLE_EVENT(svc_too_large_err);
> +DEFINE_NFSD_SIMPLE_EVENT(svc_decode_err);
> +DEFINE_NFSD_SIMPLE_EVENT(svc_dropit);
> +DEFINE_NFSD_SIMPLE_EVENT(svc_cached_reply);
> +DEFINE_NFSD_SIMPLE_EVENT(svc_encode_err);
> +
> +TRACE_EVENT(nfsd_svc_status,
> +	TP_PROTO(
> +		const struct svc_rqst *rqstp,
> +		const struct svc_procedure *proc,
> +		__be32 status
> +	),
> +	TP_ARGS(rqstp, proc, status),
> +	TP_STRUCT__entry(
> +		__field(u32, xid)
> +		__field(u32, version)
> +		__field(unsigned long, status)
> +		__string(procedure, rqstp->rq_procinfo->pc_name)
> +	),
> +	TP_fast_assign(
> +		__entry->xid = be32_to_cpu(rqstp->rq_xid);
> +		__entry->version = rqstp->rq_vers;
> +		__entry->status = be32_to_cpu(status);
> +		__assign_str(procedure, rqstp->rq_procinfo->pc_name);
> +	),
> +	TP_printk("xid=0x%08x version=%u procedure=%s status=%s",
> +		__entry->xid, __entry->version, __get_str(procedure),
> +		show_nfs_status(__entry->status)
> +	)
> +);
> +
>  TRACE_EVENT(nfsd_access,
>  	TP_PROTO(
>  		const struct svc_rqst *rqstp,
>
Chuck Lever Sept. 25, 2020, 1:59 p.m. UTC | #2
> On Sep 24, 2020, at 7:45 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> 
> On Mon, Sep 21, 2020 at 02:13:07PM -0400, Chuck Lever wrote:
>> This is follow-on work to the tracepoints added in the NFS server's
>> duplicate reply cache. Here, tracepoints are introduced that report
>> replies from cache as well as encoding and decoding errors.
>> 
>> The NFSv2, v3, and v4 dispatcher requirements have diverged over
>> time, leaving us with a little bit of technical debt. In addition,
>> I wanted to add a tracepoint for NFSv2 and NFSv3 similar to the
>> nfsd4_compound/compoundstatus tracepoints. Lastly, removing some
>> conditional branches from this hot path helps optimize CPU
>> utilization. So, I've duplicated the nfsd_dispatcher function.
> 
> Comparing current nfsd_dispatch to the nfsv2/v3 nfsd_dispatch: the only
> thing I spotted removed from the v2/v3-specific dispatch is the
> rq_lease_breaker = NULL.  (I think that's not correct, actually.  We
> could remove the need for that to be set in the v2/v3 case, but with the
> current code it does need to be set.)

Noted with thanks.


> Comparing current nfsd_dispatch to the nfsv4 nfsd4_dispatch, the
> v4-specific dispatch does away with nfs_request_too_big() and the
> v2-specific shortcut in the error encoding case.
> 
> So these still look *very* similar.  I don't feel like we're getting a
> lot of benefit out of splitting these out.

I don't disagree with that at all. At this point I'm just noodling
to see what's possible. I'm now toying with other ways to add high-
value tracing in the legacy ULPs. In the end I might end up avoiding
significant changes in the dispatchers in order to add tracing.

However, a few thoughts I had while learning how the dispatcher
code works.

There are some opportunities for reducing instruction path length
and the number of conditional branches in here. It's a hot path,
so I think we should consider some careful micro-optimizations
even if they don't add significant new features or do add some
code duplication.

In user space, the library (iirc) assumes each ULP provides it's
own dispatcher function. I'd consider duplicating and removing
svc_generic_dispatcher() to simplify the pasta in svc_process(),
again as a micro-optimization and for better code legibility.

lockd's pc_func returns an RPC accept_stat, but the NFSD pc_func
methods return an NFS status. The latter feels like a layering
violation for the sake of reducing a small amount of code
duplication. I'd rather see encoding of the NFS status handled in
the NFS Reply encoders, since that is an XDR function, and because
that logic seems slightly different for NFSv2, support for which
we'd like to deprecate at some point.

Note also that *statp in nfsd_dispatch is never explicitly set to
rpc_success in the normal execution flow. It relies on the
equivalence of rpc_success and nfs_ok, which is convenient, but
confusing to read. It might be cleaner if *statp was made an enum
to make it explicit what set of values go in that return variable.


> By the way, the combination of copying a bunch of code, doing some
> common cleanup, and dropping version-specific stuff makes it a little
> hard to sort out what's going on.  If it were me, I'd do this as:
> 
> 	- one patch to do common cleanup (dropping some redundant
> 	  comments, using argv/resv variables to cleanup calculations,
> 	  etc.)
> 	- a second patch that just duplicates the one nfsd_dispatch into
> 	  nfsd_dispatch and nfsd4_dispatch
> 	- a third patch that just removes the stuff we don't need from
> 	  the newly duplicated dispatchers.
> 
> and then it'd be obvious what's changed.

Good points. The series is still immature, and I tend to maintain
larger checkpoint patches that get broken down over time to make
it more obvious what is changing and why. I'll keep your comments
in mind as this work evolves. Feel free to make similar suggestions
about my future work.


> --b.
> 
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> fs/nfsd/nfs2acl.c  |    2 -
>> fs/nfsd/nfs3acl.c  |    2 -
>> fs/nfsd/nfs4proc.c |    2 -
>> fs/nfsd/nfsd.h     |    1 
>> fs/nfsd/nfssvc.c   |  198 ++++++++++++++++++++++++++++++++++------------------
>> fs/nfsd/trace.h    |   50 +++++++++++++
>> 6 files changed, 183 insertions(+), 72 deletions(-)
>> 
>> diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
>> index 54e597918822..894b8f0594e2 100644
>> --- a/fs/nfsd/nfs2acl.c
>> +++ b/fs/nfsd/nfs2acl.c
>> @@ -416,6 +416,6 @@ const struct svc_version nfsd_acl_version2 = {
>> 	.vs_nproc	= 5,
>> 	.vs_proc	= nfsd_acl_procedures2,
>> 	.vs_count	= nfsd_acl_count2,
>> -	.vs_dispatch	= nfsd_dispatch,
>> +	.vs_dispatch	= nfsd4_dispatch,
>> 	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
>> };
>> diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
>> index 7f512dec7460..924ebb19c59c 100644
>> --- a/fs/nfsd/nfs3acl.c
>> +++ b/fs/nfsd/nfs3acl.c
>> @@ -282,7 +282,7 @@ const struct svc_version nfsd_acl_version3 = {
>> 	.vs_nproc	= 3,
>> 	.vs_proc	= nfsd_acl_procedures3,
>> 	.vs_count	= nfsd_acl_count3,
>> -	.vs_dispatch	= nfsd_dispatch,
>> +	.vs_dispatch	= nfsd4_dispatch,
>> 	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
>> };
>> 
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index 49109645af24..61302641f651 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -3320,7 +3320,7 @@ const struct svc_version nfsd_version4 = {
>> 	.vs_nproc		= 2,
>> 	.vs_proc		= nfsd_procedures4,
>> 	.vs_count		= nfsd_count3,
>> -	.vs_dispatch		= nfsd_dispatch,
>> +	.vs_dispatch		= nfsd4_dispatch,
>> 	.vs_xdrsize		= NFS4_SVC_XDRSIZE,
>> 	.vs_rpcb_optnl		= true,
>> 	.vs_need_cong_ctrl	= true,
>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
>> index cb742e17e04a..7fa4b19dd2f7 100644
>> --- a/fs/nfsd/nfsd.h
>> +++ b/fs/nfsd/nfsd.h
>> @@ -78,6 +78,7 @@ extern const struct seq_operations nfs_exports_op;
>>  */
>> int		nfsd_svc(int nrservs, struct net *net, const struct cred *cred);
>> int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
>> +int		nfsd4_dispatch(struct svc_rqst *rqstp, __be32 *statp);
>> 
>> int		nfsd_nrthreads(struct net *);
>> int		nfsd_nrpools(struct net *);
>> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
>> index f7f6473578af..d626eea1c78a 100644
>> --- a/fs/nfsd/nfssvc.c
>> +++ b/fs/nfsd/nfssvc.c
>> @@ -28,6 +28,7 @@
>> #include "vfs.h"
>> #include "netns.h"
>> #include "filecache.h"
>> +#include "trace.h"
>> 
>> #define NFSDDBG_FACILITY	NFSDDBG_SVC
>> 
>> @@ -964,7 +965,7 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
>> {
>> 	if (nfserr == nfserr_jukebox && vers == 2)
>> 		return nfserr_dropit;
>> -	if (nfserr == nfserr_wrongsec && vers < 4)
>> +	if (nfserr == nfserr_wrongsec)
>> 		return nfserr_acces;
>> 	return nfserr;
>> }
>> @@ -980,18 +981,6 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
>> static bool nfs_request_too_big(struct svc_rqst *rqstp,
>> 				const struct svc_procedure *proc)
>> {
>> -	/*
>> -	 * The ACL code has more careful bounds-checking and is not
>> -	 * susceptible to this problem:
>> -	 */
>> -	if (rqstp->rq_prog != NFS_PROGRAM)
>> -		return false;
>> -	/*
>> -	 * Ditto NFSv4 (which can in theory have argument and reply both
>> -	 * more than a page):
>> -	 */
>> -	if (rqstp->rq_vers >= 4)
>> -		return false;
>> 	/* The reply will be small, we're OK: */
>> 	if (proc->pc_xdrressize > 0 &&
>> 	    proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE))
>> @@ -1000,81 +989,152 @@ static bool nfs_request_too_big(struct svc_rqst *rqstp,
>> 	return rqstp->rq_arg.len > PAGE_SIZE;
>> }
>> 
>> -int
>> -nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
>> +/**
>> + * nfsd_dispatch - Process an NFSv2 or NFSv3 request
>> + * @rqstp: incoming NFS request
>> + * @statp: OUT: RPC accept_stat value
>> + *
>> + * Return values:
>> + *  %0: Processing complete; do not send a Reply
>> + *  %1: Processing complete; send a Reply
>> + */
>> +int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
>> {
>> -	const struct svc_procedure *proc;
>> -	__be32			nfserr;
>> -	__be32			*nfserrp;
>> -
>> -	dprintk("nfsd_dispatch: vers %d proc %d\n",
>> -				rqstp->rq_vers, rqstp->rq_proc);
>> -	proc = rqstp->rq_procinfo;
>> -
>> -	if (nfs_request_too_big(rqstp, proc)) {
>> -		dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers);
>> -		*statp = rpc_garbage_args;
>> -		return 1;
>> +	const struct svc_procedure *proc = rqstp->rq_procinfo;
>> +	struct kvec *argv = &rqstp->rq_arg.head[0];
>> +	struct kvec *resv = &rqstp->rq_res.head[0];
>> +	__be32 nfserr, *nfserrp;
>> +
>> +	if (nfs_request_too_big(rqstp, proc))
>> +		goto out_too_large;
>> +
>> +	if (proc->pc_decode && !procp->pc_decode(rqstp, argv->iov_base))
>> +		goto out_decode_err;
>> +
>> +	rqstp->rq_cachetype = proc->pc_cachetype;
>> +	switch (nfsd_cache_lookup(rqstp)) {
>> +	case RC_DROPIT:
>> +		goto out_dropit;
>> +	case RC_REPLY:
>> +		goto out_cached_reply;
>> +	case RC_DOIT:
>> +		break;
>> 	}
>> -	rqstp->rq_lease_breaker = NULL;
>> +
>> +	nfserrp = resv->iov_base + resv->iov_len;
>> +	resv->iov_len += sizeof(__be32);
>> +	nfserr = proc->pc_func(rqstp);
>> +	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
>> +	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags))
>> +		goto out_update_drop;
>> +	if (rqstp->rq_proc)
>> +		*nfserrp++ = nfserr;
>> +
>> +	/* For NFSv2, additional info is never returned in case of an error. */
>> +	if (!(nfserr && rqstp->rq_vers == 2))
>> +		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp))
>> +			goto out_encode_err;
>> +
>> +	nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
>> +	trace_nfsd_svc_status(rqstp, proc, nfserr);
>> +	return 1;
>> +
>> +out_too_large:
>> +	trace_nfsd_svc_too_large_err(rqstp);
>> +	*statp = rpc_garbage_args;
>> +	return 1;
>> +
>> +out_decode_err:
>> +	trace_nfsd_svc_decode_err(rqstp);
>> +	*statp = rpc_garbage_args;
>> +	return 1;
>> +
>> +out_update_drop:
>> +	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
>> +out_dropit:
>> +	trace_nfsd_svc_dropit(rqstp);
>> +	return 0;
>> +
>> +out_cached_reply:
>> +	trace_nfsd_svc_cached_reply(rqstp);
>> +	return 1;
>> +
>> +out_encode_err:
>> +	trace_nfsd_svc_encode_err(rqstp);
>> +	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
>> +	*statp = rpc_system_err;
>> +	return 1;
>> +}
>> +
>> +/**
>> + * nfsd4_dispatch - Process an NFSv4 or NFSACL request
>> + * @rqstp: incoming NFS request
>> + * @statp: OUT: RPC accept_stat value
>> + *
>> + * Return values:
>> + *  %0: Processing complete; do not send a Reply
>> + *  %1: Processing complete; send a Reply
>> + */
>> +int nfsd4_dispatch(struct svc_rqst *rqstp, __be32 *statp)
>> +{
>> +	const struct svc_procedure *proc = rqstp->rq_procinfo;
>> +	struct kvec *argv = &rqstp->rq_arg.head[0];
>> +	struct kvec *resv = &rqstp->rq_res.head[0];
>> +	__be32 nfserr, *nfserrp;
>> +
>> 	/*
>> 	 * Give the xdr decoder a chance to change this if it wants
>> 	 * (necessary in the NFSv4.0 compound case)
>> 	 */
>> 	rqstp->rq_cachetype = proc->pc_cachetype;
>> -	/* Decode arguments */
>> -	if (proc->pc_decode &&
>> -	    !proc->pc_decode(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base)) {
>> -		dprintk("nfsd: failed to decode arguments!\n");
>> -		*statp = rpc_garbage_args;
>> -		return 1;
>> -	}
>> +	rqstp->rq_lease_breaker = NULL;
>> +
>> +	if (proc->pc_decode && !procp->pc_decode(rqstp, argv->iov_base))
>> +		goto out_decode_err;
>> 
>> -	/* Check whether we have this call in the cache. */
>> 	switch (nfsd_cache_lookup(rqstp)) {
>> 	case RC_DROPIT:
>> -		return 0;
>> +		goto out_dropit;
>> 	case RC_REPLY:
>> -		return 1;
>> -	case RC_DOIT:;
>> -		/* do it */
>> +		goto out_cached_reply;
>> +	case RC_DOIT:
>> +		break;
>> 	}
>> 
>> -	/* need to grab the location to store the status, as
>> -	 * nfsv4 does some encoding while processing 
>> -	 */
>> -	nfserrp = rqstp->rq_res.head[0].iov_base
>> -		+ rqstp->rq_res.head[0].iov_len;
>> -	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
>> -
>> -	/* Now call the procedure handler, and encode NFS status. */
>> +	nfserrp = resv->iov_base + resv->iov_len;
>> +	resv->iov_len += sizeof(__be32);
>> 	nfserr = proc->pc_func(rqstp);
>> -	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
>> -	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
>> -		dprintk("nfsd: Dropping request; may be revisited later\n");
>> -		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
>> -		return 0;
>> -	}
>> -
>> -	if (rqstp->rq_proc != 0)
>> +	if (test_bit(RQ_DROPME, &rqstp->rq_flags))
>> +		goto out_update_drop;
>> +	if (rqstp->rq_proc)
>> 		*nfserrp++ = nfserr;
>> 
>> -	/* Encode result.
>> -	 * For NFSv2, additional info is never returned in case of an error.
>> -	 */
>> -	if (!(nfserr && rqstp->rq_vers == 2)) {
>> -		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp)) {
>> -			/* Failed to encode result. Release cache entry */
>> -			dprintk("nfsd: failed to encode result!\n");
>> -			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
>> -			*statp = rpc_system_err;
>> -			return 1;
>> -		}
>> -	}
>> +	if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp))
>> +		goto out_encode_err;
>> 
>> -	/* Store reply in cache. */
>> 	nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
>> 	return 1;
>> +
>> +out_decode_err:
>> +	trace_nfsd_svc_decode_err(rqstp);
>> +	*statp = rpc_garbage_args;
>> +	return 1;
>> +
>> +out_update_drop:
>> +	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
>> +out_dropit:
>> +	trace_nfsd_svc_dropit(rqstp);
>> +	return 0;
>> +
>> +out_cached_reply:
>> +	trace_nfsd_svc_cached_reply(rqstp);
>> +	return 1;
>> +
>> +out_encode_err:
>> +	trace_nfsd_svc_encode_err(rqstp);
>> +	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
>> +	*statp = rpc_system_err;
>> +	return 1;
>> }
>> 
>> int nfsd_pool_stats_open(struct inode *inode, struct file *file)
>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>> index 53115fbc00b2..50ab4a84c25f 100644
>> --- a/fs/nfsd/trace.h
>> +++ b/fs/nfsd/trace.h
>> @@ -32,6 +32,56 @@
>> 		{ NFSD_MAY_READ_IF_EXEC,	"READ_IF_EXEC" },	\
>> 		{ NFSD_MAY_64BIT_COOKIE,	"64BIT_COOKIE" })
>> 
>> +DECLARE_EVENT_CLASS(nfsd_simple_class,
>> +	TP_PROTO(
>> +		const struct svc_rqst *rqstp
>> +	),
>> +	TP_ARGS(rqstp),
>> +	TP_STRUCT__entry(
>> +		__field(u32, xid)
>> +	),
>> +	TP_fast_assign(
>> +		__entry->xid = be32_to_cpu(rqstp->rq_xid);
>> +	),
>> +	TP_printk("xid=0x%08x", __entry->xid)
>> +);
>> +
>> +#define DEFINE_NFSD_SIMPLE_EVENT(name)			\
>> +DEFINE_EVENT(nfsd_simple_class, nfsd_##name,		\
>> +	TP_PROTO(const struct svc_rqst *rqstp),		\
>> +	TP_ARGS(rqstp))
>> +
>> +DEFINE_NFSD_SIMPLE_EVENT(svc_too_large_err);
>> +DEFINE_NFSD_SIMPLE_EVENT(svc_decode_err);
>> +DEFINE_NFSD_SIMPLE_EVENT(svc_dropit);
>> +DEFINE_NFSD_SIMPLE_EVENT(svc_cached_reply);
>> +DEFINE_NFSD_SIMPLE_EVENT(svc_encode_err);
>> +
>> +TRACE_EVENT(nfsd_svc_status,
>> +	TP_PROTO(
>> +		const struct svc_rqst *rqstp,
>> +		const struct svc_procedure *proc,
>> +		__be32 status
>> +	),
>> +	TP_ARGS(rqstp, proc, status),
>> +	TP_STRUCT__entry(
>> +		__field(u32, xid)
>> +		__field(u32, version)
>> +		__field(unsigned long, status)
>> +		__string(procedure, rqstp->rq_procinfo->pc_name)
>> +	),
>> +	TP_fast_assign(
>> +		__entry->xid = be32_to_cpu(rqstp->rq_xid);
>> +		__entry->version = rqstp->rq_vers;
>> +		__entry->status = be32_to_cpu(status);
>> +		__assign_str(procedure, rqstp->rq_procinfo->pc_name);
>> +	),
>> +	TP_printk("xid=0x%08x version=%u procedure=%s status=%s",
>> +		__entry->xid, __entry->version, __get_str(procedure),
>> +		show_nfs_status(__entry->status)
>> +	)
>> +);
>> +
>> TRACE_EVENT(nfsd_access,
>> 	TP_PROTO(
>> 		const struct svc_rqst *rqstp,
>> 

--
Chuck Lever
J. Bruce Fields Sept. 25, 2020, 2:17 p.m. UTC | #3
On Fri, Sep 25, 2020 at 09:59:54AM -0400, Chuck Lever wrote:
> 
> 
> > On Sep 24, 2020, at 7:45 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> > 
> > On Mon, Sep 21, 2020 at 02:13:07PM -0400, Chuck Lever wrote:
> >> This is follow-on work to the tracepoints added in the NFS server's
> >> duplicate reply cache. Here, tracepoints are introduced that report
> >> replies from cache as well as encoding and decoding errors.
> >> 
> >> The NFSv2, v3, and v4 dispatcher requirements have diverged over
> >> time, leaving us with a little bit of technical debt. In addition,
> >> I wanted to add a tracepoint for NFSv2 and NFSv3 similar to the
> >> nfsd4_compound/compoundstatus tracepoints. Lastly, removing some
> >> conditional branches from this hot path helps optimize CPU
> >> utilization. So, I've duplicated the nfsd_dispatcher function.
> > 
> > Comparing current nfsd_dispatch to the nfsv2/v3 nfsd_dispatch: the only
> > thing I spotted removed from the v2/v3-specific dispatch is the
> > rq_lease_breaker = NULL.  (I think that's not correct, actually.  We
> > could remove the need for that to be set in the v2/v3 case, but with the
> > current code it does need to be set.)
> 
> Noted with thanks.

Maybe just do this?

--b.

commit c7265a111269
Author: J. Bruce Fields <bfields@redhat.com>
Date:   Fri Sep 25 10:12:39 2020 -0400

    nfsd: rq_lease_breaker cleanup
    
    Since only the v4 code cares about it, maybe it's better to leave
    rq_lease_breaker out of the common dispatch code?
    
    Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 62afcae18e17..a498278ba96b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4597,6 +4597,9 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl)
 
 	if (!i_am_nfsd())
 		return NULL;
+	/* Note rq_prog == NFS_ACL_PROGRAM is also possible: */
+	if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4)
+		return NULL;
 	rqst = kthread_data(current);
 	clp = *(rqst->rq_lease_breaker);
 	return dl->dl_stid.sc_client == clp;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b603dfcdd361..8d6f6f4c8b28 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -1016,7 +1016,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 		*statp = rpc_garbage_args;
 		return 1;
 	}
-	rqstp->rq_lease_breaker = NULL;
 	/*
 	 * Give the xdr decoder a chance to change this if it wants
 	 * (necessary in the NFSv4.0 compound case)
Chuck Lever Sept. 25, 2020, 2:18 p.m. UTC | #4
> On Sep 25, 2020, at 9:59 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
> 
> 
> 
>> On Sep 24, 2020, at 7:45 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
>> 
>> On Mon, Sep 21, 2020 at 02:13:07PM -0400, Chuck Lever wrote:
>>> This is follow-on work to the tracepoints added in the NFS server's
>>> duplicate reply cache. Here, tracepoints are introduced that report
>>> replies from cache as well as encoding and decoding errors.
>>> 
>>> The NFSv2, v3, and v4 dispatcher requirements have diverged over
>>> time, leaving us with a little bit of technical debt. In addition,
>>> I wanted to add a tracepoint for NFSv2 and NFSv3 similar to the
>>> nfsd4_compound/compoundstatus tracepoints. Lastly, removing some
>>> conditional branches from this hot path helps optimize CPU
>>> utilization. So, I've duplicated the nfsd_dispatcher function.
>> 
>> Comparing current nfsd_dispatch to the nfsv2/v3 nfsd_dispatch: the only
>> thing I spotted removed from the v2/v3-specific dispatch is the
>> rq_lease_breaker = NULL.  (I think that's not correct, actually.  We
>> could remove the need for that to be set in the v2/v3 case, but with the
>> current code it does need to be set.)
> 
> Noted with thanks.
> 
> 
>> Comparing current nfsd_dispatch to the nfsv4 nfsd4_dispatch, the
>> v4-specific dispatch does away with nfs_request_too_big() and the
>> v2-specific shortcut in the error encoding case.
>> 
>> So these still look *very* similar.  I don't feel like we're getting a
>> lot of benefit out of splitting these out.
> 
> I don't disagree with that at all. At this point I'm just noodling
> to see what's possible. I'm now toying with other ways to add high-
> value tracing in the legacy ULPs. In the end I might end up avoiding
> significant changes in the dispatchers in order to add tracing.
> 
> However, a few thoughts I had while learning how the dispatcher
> code works.
> 
> There are some opportunities for reducing instruction path length
> and the number of conditional branches in here. It's a hot path,
> so I think we should consider some careful micro-optimizations
> even if they don't add significant new features or do add some
> code duplication.
> 
> In user space, the library (iirc) assumes each ULP provides it's
> own dispatcher function. I'd consider duplicating and removing
> svc_generic_dispatcher() to simplify the pasta in svc_process(),
> again as a micro-optimization and for better code legibility.
> 
> lockd's pc_func returns an RPC accept_stat, but the NFSD pc_func
> methods return an NFS status. The latter feels like a layering
> violation for the sake of reducing a small amount of code
> duplication. I'd rather see encoding of the NFS status handled in
> the NFS Reply encoders, since that is an XDR function, and because
> that logic seems slightly different for NFSv2, support for which
> we'd like to deprecate at some point.

I misremembered this. nfsd_dispatch simply doesn't set *statp
in the success case, which seems strange.


> Note also that *statp in nfsd_dispatch is never explicitly set to
> rpc_success in the normal execution flow. It relies on the
> equivalence of rpc_success and nfs_ok, which is convenient, but
> confusing to read. It might be cleaner if *statp was made an enum
> to make it explicit what set of values go in that return variable.
> 
> 
>> By the way, the combination of copying a bunch of code, doing some
>> common cleanup, and dropping version-specific stuff makes it a little
>> hard to sort out what's going on.  If it were me, I'd do this as:
>> 
>> 	- one patch to do common cleanup (dropping some redundant
>> 	  comments, using argv/resv variables to cleanup calculations,
>> 	  etc.)
>> 	- a second patch that just duplicates the one nfsd_dispatch into
>> 	  nfsd_dispatch and nfsd4_dispatch
>> 	- a third patch that just removes the stuff we don't need from
>> 	  the newly duplicated dispatchers.
>> 
>> and then it'd be obvious what's changed.
> 
> Good points. The series is still immature, and I tend to maintain
> larger checkpoint patches that get broken down over time to make
> it more obvious what is changing and why. I'll keep your comments
> in mind as this work evolves. Feel free to make similar suggestions
> about my future work.
> 
> 
>> --b.
>> 
>>> 
>>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>>> ---
>>> fs/nfsd/nfs2acl.c  |    2 -
>>> fs/nfsd/nfs3acl.c  |    2 -
>>> fs/nfsd/nfs4proc.c |    2 -
>>> fs/nfsd/nfsd.h     |    1 
>>> fs/nfsd/nfssvc.c   |  198 ++++++++++++++++++++++++++++++++++------------------
>>> fs/nfsd/trace.h    |   50 +++++++++++++
>>> 6 files changed, 183 insertions(+), 72 deletions(-)
>>> 
>>> diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
>>> index 54e597918822..894b8f0594e2 100644
>>> --- a/fs/nfsd/nfs2acl.c
>>> +++ b/fs/nfsd/nfs2acl.c
>>> @@ -416,6 +416,6 @@ const struct svc_version nfsd_acl_version2 = {
>>> 	.vs_nproc	= 5,
>>> 	.vs_proc	= nfsd_acl_procedures2,
>>> 	.vs_count	= nfsd_acl_count2,
>>> -	.vs_dispatch	= nfsd_dispatch,
>>> +	.vs_dispatch	= nfsd4_dispatch,
>>> 	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
>>> };
>>> diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
>>> index 7f512dec7460..924ebb19c59c 100644
>>> --- a/fs/nfsd/nfs3acl.c
>>> +++ b/fs/nfsd/nfs3acl.c
>>> @@ -282,7 +282,7 @@ const struct svc_version nfsd_acl_version3 = {
>>> 	.vs_nproc	= 3,
>>> 	.vs_proc	= nfsd_acl_procedures3,
>>> 	.vs_count	= nfsd_acl_count3,
>>> -	.vs_dispatch	= nfsd_dispatch,
>>> +	.vs_dispatch	= nfsd4_dispatch,
>>> 	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
>>> };
>>> 
>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>>> index 49109645af24..61302641f651 100644
>>> --- a/fs/nfsd/nfs4proc.c
>>> +++ b/fs/nfsd/nfs4proc.c
>>> @@ -3320,7 +3320,7 @@ const struct svc_version nfsd_version4 = {
>>> 	.vs_nproc		= 2,
>>> 	.vs_proc		= nfsd_procedures4,
>>> 	.vs_count		= nfsd_count3,
>>> -	.vs_dispatch		= nfsd_dispatch,
>>> +	.vs_dispatch		= nfsd4_dispatch,
>>> 	.vs_xdrsize		= NFS4_SVC_XDRSIZE,
>>> 	.vs_rpcb_optnl		= true,
>>> 	.vs_need_cong_ctrl	= true,
>>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
>>> index cb742e17e04a..7fa4b19dd2f7 100644
>>> --- a/fs/nfsd/nfsd.h
>>> +++ b/fs/nfsd/nfsd.h
>>> @@ -78,6 +78,7 @@ extern const struct seq_operations nfs_exports_op;
>>> */
>>> int		nfsd_svc(int nrservs, struct net *net, const struct cred *cred);
>>> int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
>>> +int		nfsd4_dispatch(struct svc_rqst *rqstp, __be32 *statp);
>>> 
>>> int		nfsd_nrthreads(struct net *);
>>> int		nfsd_nrpools(struct net *);
>>> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
>>> index f7f6473578af..d626eea1c78a 100644
>>> --- a/fs/nfsd/nfssvc.c
>>> +++ b/fs/nfsd/nfssvc.c
>>> @@ -28,6 +28,7 @@
>>> #include "vfs.h"
>>> #include "netns.h"
>>> #include "filecache.h"
>>> +#include "trace.h"
>>> 
>>> #define NFSDDBG_FACILITY	NFSDDBG_SVC
>>> 
>>> @@ -964,7 +965,7 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
>>> {
>>> 	if (nfserr == nfserr_jukebox && vers == 2)
>>> 		return nfserr_dropit;
>>> -	if (nfserr == nfserr_wrongsec && vers < 4)
>>> +	if (nfserr == nfserr_wrongsec)
>>> 		return nfserr_acces;
>>> 	return nfserr;
>>> }
>>> @@ -980,18 +981,6 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
>>> static bool nfs_request_too_big(struct svc_rqst *rqstp,
>>> 				const struct svc_procedure *proc)
>>> {
>>> -	/*
>>> -	 * The ACL code has more careful bounds-checking and is not
>>> -	 * susceptible to this problem:
>>> -	 */
>>> -	if (rqstp->rq_prog != NFS_PROGRAM)
>>> -		return false;
>>> -	/*
>>> -	 * Ditto NFSv4 (which can in theory have argument and reply both
>>> -	 * more than a page):
>>> -	 */
>>> -	if (rqstp->rq_vers >= 4)
>>> -		return false;
>>> 	/* The reply will be small, we're OK: */
>>> 	if (proc->pc_xdrressize > 0 &&
>>> 	    proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE))
>>> @@ -1000,81 +989,152 @@ static bool nfs_request_too_big(struct svc_rqst *rqstp,
>>> 	return rqstp->rq_arg.len > PAGE_SIZE;
>>> }
>>> 
>>> -int
>>> -nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
>>> +/**
>>> + * nfsd_dispatch - Process an NFSv2 or NFSv3 request
>>> + * @rqstp: incoming NFS request
>>> + * @statp: OUT: RPC accept_stat value
>>> + *
>>> + * Return values:
>>> + *  %0: Processing complete; do not send a Reply
>>> + *  %1: Processing complete; send a Reply
>>> + */
>>> +int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
>>> {
>>> -	const struct svc_procedure *proc;
>>> -	__be32			nfserr;
>>> -	__be32			*nfserrp;
>>> -
>>> -	dprintk("nfsd_dispatch: vers %d proc %d\n",
>>> -				rqstp->rq_vers, rqstp->rq_proc);
>>> -	proc = rqstp->rq_procinfo;
>>> -
>>> -	if (nfs_request_too_big(rqstp, proc)) {
>>> -		dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers);
>>> -		*statp = rpc_garbage_args;
>>> -		return 1;
>>> +	const struct svc_procedure *proc = rqstp->rq_procinfo;
>>> +	struct kvec *argv = &rqstp->rq_arg.head[0];
>>> +	struct kvec *resv = &rqstp->rq_res.head[0];
>>> +	__be32 nfserr, *nfserrp;
>>> +
>>> +	if (nfs_request_too_big(rqstp, proc))
>>> +		goto out_too_large;
>>> +
>>> +	if (proc->pc_decode && !procp->pc_decode(rqstp, argv->iov_base))
>>> +		goto out_decode_err;
>>> +
>>> +	rqstp->rq_cachetype = proc->pc_cachetype;
>>> +	switch (nfsd_cache_lookup(rqstp)) {
>>> +	case RC_DROPIT:
>>> +		goto out_dropit;
>>> +	case RC_REPLY:
>>> +		goto out_cached_reply;
>>> +	case RC_DOIT:
>>> +		break;
>>> 	}
>>> -	rqstp->rq_lease_breaker = NULL;
>>> +
>>> +	nfserrp = resv->iov_base + resv->iov_len;
>>> +	resv->iov_len += sizeof(__be32);
>>> +	nfserr = proc->pc_func(rqstp);
>>> +	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
>>> +	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags))
>>> +		goto out_update_drop;
>>> +	if (rqstp->rq_proc)
>>> +		*nfserrp++ = nfserr;
>>> +
>>> +	/* For NFSv2, additional info is never returned in case of an error. */
>>> +	if (!(nfserr && rqstp->rq_vers == 2))
>>> +		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp))
>>> +			goto out_encode_err;
>>> +
>>> +	nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
>>> +	trace_nfsd_svc_status(rqstp, proc, nfserr);
>>> +	return 1;
>>> +
>>> +out_too_large:
>>> +	trace_nfsd_svc_too_large_err(rqstp);
>>> +	*statp = rpc_garbage_args;
>>> +	return 1;
>>> +
>>> +out_decode_err:
>>> +	trace_nfsd_svc_decode_err(rqstp);
>>> +	*statp = rpc_garbage_args;
>>> +	return 1;
>>> +
>>> +out_update_drop:
>>> +	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
>>> +out_dropit:
>>> +	trace_nfsd_svc_dropit(rqstp);
>>> +	return 0;
>>> +
>>> +out_cached_reply:
>>> +	trace_nfsd_svc_cached_reply(rqstp);
>>> +	return 1;
>>> +
>>> +out_encode_err:
>>> +	trace_nfsd_svc_encode_err(rqstp);
>>> +	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
>>> +	*statp = rpc_system_err;
>>> +	return 1;
>>> +}
>>> +
>>> +/**
>>> + * nfsd4_dispatch - Process an NFSv4 or NFSACL request
>>> + * @rqstp: incoming NFS request
>>> + * @statp: OUT: RPC accept_stat value
>>> + *
>>> + * Return values:
>>> + *  %0: Processing complete; do not send a Reply
>>> + *  %1: Processing complete; send a Reply
>>> + */
>>> +int nfsd4_dispatch(struct svc_rqst *rqstp, __be32 *statp)
>>> +{
>>> +	const struct svc_procedure *proc = rqstp->rq_procinfo;
>>> +	struct kvec *argv = &rqstp->rq_arg.head[0];
>>> +	struct kvec *resv = &rqstp->rq_res.head[0];
>>> +	__be32 nfserr, *nfserrp;
>>> +
>>> 	/*
>>> 	 * Give the xdr decoder a chance to change this if it wants
>>> 	 * (necessary in the NFSv4.0 compound case)
>>> 	 */
>>> 	rqstp->rq_cachetype = proc->pc_cachetype;
>>> -	/* Decode arguments */
>>> -	if (proc->pc_decode &&
>>> -	    !proc->pc_decode(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base)) {
>>> -		dprintk("nfsd: failed to decode arguments!\n");
>>> -		*statp = rpc_garbage_args;
>>> -		return 1;
>>> -	}
>>> +	rqstp->rq_lease_breaker = NULL;
>>> +
>>> +	if (proc->pc_decode && !procp->pc_decode(rqstp, argv->iov_base))
>>> +		goto out_decode_err;
>>> 
>>> -	/* Check whether we have this call in the cache. */
>>> 	switch (nfsd_cache_lookup(rqstp)) {
>>> 	case RC_DROPIT:
>>> -		return 0;
>>> +		goto out_dropit;
>>> 	case RC_REPLY:
>>> -		return 1;
>>> -	case RC_DOIT:;
>>> -		/* do it */
>>> +		goto out_cached_reply;
>>> +	case RC_DOIT:
>>> +		break;
>>> 	}
>>> 
>>> -	/* need to grab the location to store the status, as
>>> -	 * nfsv4 does some encoding while processing 
>>> -	 */
>>> -	nfserrp = rqstp->rq_res.head[0].iov_base
>>> -		+ rqstp->rq_res.head[0].iov_len;
>>> -	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
>>> -
>>> -	/* Now call the procedure handler, and encode NFS status. */
>>> +	nfserrp = resv->iov_base + resv->iov_len;
>>> +	resv->iov_len += sizeof(__be32);
>>> 	nfserr = proc->pc_func(rqstp);
>>> -	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
>>> -	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
>>> -		dprintk("nfsd: Dropping request; may be revisited later\n");
>>> -		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
>>> -		return 0;
>>> -	}
>>> -
>>> -	if (rqstp->rq_proc != 0)
>>> +	if (test_bit(RQ_DROPME, &rqstp->rq_flags))
>>> +		goto out_update_drop;
>>> +	if (rqstp->rq_proc)
>>> 		*nfserrp++ = nfserr;
>>> 
>>> -	/* Encode result.
>>> -	 * For NFSv2, additional info is never returned in case of an error.
>>> -	 */
>>> -	if (!(nfserr && rqstp->rq_vers == 2)) {
>>> -		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp)) {
>>> -			/* Failed to encode result. Release cache entry */
>>> -			dprintk("nfsd: failed to encode result!\n");
>>> -			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
>>> -			*statp = rpc_system_err;
>>> -			return 1;
>>> -		}
>>> -	}
>>> +	if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp))
>>> +		goto out_encode_err;
>>> 
>>> -	/* Store reply in cache. */
>>> 	nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
>>> 	return 1;
>>> +
>>> +out_decode_err:
>>> +	trace_nfsd_svc_decode_err(rqstp);
>>> +	*statp = rpc_garbage_args;
>>> +	return 1;
>>> +
>>> +out_update_drop:
>>> +	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
>>> +out_dropit:
>>> +	trace_nfsd_svc_dropit(rqstp);
>>> +	return 0;
>>> +
>>> +out_cached_reply:
>>> +	trace_nfsd_svc_cached_reply(rqstp);
>>> +	return 1;
>>> +
>>> +out_encode_err:
>>> +	trace_nfsd_svc_encode_err(rqstp);
>>> +	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
>>> +	*statp = rpc_system_err;
>>> +	return 1;
>>> }
>>> 
>>> int nfsd_pool_stats_open(struct inode *inode, struct file *file)
>>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>>> index 53115fbc00b2..50ab4a84c25f 100644
>>> --- a/fs/nfsd/trace.h
>>> +++ b/fs/nfsd/trace.h
>>> @@ -32,6 +32,56 @@
>>> 		{ NFSD_MAY_READ_IF_EXEC,	"READ_IF_EXEC" },	\
>>> 		{ NFSD_MAY_64BIT_COOKIE,	"64BIT_COOKIE" })
>>> 
>>> +DECLARE_EVENT_CLASS(nfsd_simple_class,
>>> +	TP_PROTO(
>>> +		const struct svc_rqst *rqstp
>>> +	),
>>> +	TP_ARGS(rqstp),
>>> +	TP_STRUCT__entry(
>>> +		__field(u32, xid)
>>> +	),
>>> +	TP_fast_assign(
>>> +		__entry->xid = be32_to_cpu(rqstp->rq_xid);
>>> +	),
>>> +	TP_printk("xid=0x%08x", __entry->xid)
>>> +);
>>> +
>>> +#define DEFINE_NFSD_SIMPLE_EVENT(name)			\
>>> +DEFINE_EVENT(nfsd_simple_class, nfsd_##name,		\
>>> +	TP_PROTO(const struct svc_rqst *rqstp),		\
>>> +	TP_ARGS(rqstp))
>>> +
>>> +DEFINE_NFSD_SIMPLE_EVENT(svc_too_large_err);
>>> +DEFINE_NFSD_SIMPLE_EVENT(svc_decode_err);
>>> +DEFINE_NFSD_SIMPLE_EVENT(svc_dropit);
>>> +DEFINE_NFSD_SIMPLE_EVENT(svc_cached_reply);
>>> +DEFINE_NFSD_SIMPLE_EVENT(svc_encode_err);
>>> +
>>> +TRACE_EVENT(nfsd_svc_status,
>>> +	TP_PROTO(
>>> +		const struct svc_rqst *rqstp,
>>> +		const struct svc_procedure *proc,
>>> +		__be32 status
>>> +	),
>>> +	TP_ARGS(rqstp, proc, status),
>>> +	TP_STRUCT__entry(
>>> +		__field(u32, xid)
>>> +		__field(u32, version)
>>> +		__field(unsigned long, status)
>>> +		__string(procedure, rqstp->rq_procinfo->pc_name)
>>> +	),
>>> +	TP_fast_assign(
>>> +		__entry->xid = be32_to_cpu(rqstp->rq_xid);
>>> +		__entry->version = rqstp->rq_vers;
>>> +		__entry->status = be32_to_cpu(status);
>>> +		__assign_str(procedure, rqstp->rq_procinfo->pc_name);
>>> +	),
>>> +	TP_printk("xid=0x%08x version=%u procedure=%s status=%s",
>>> +		__entry->xid, __entry->version, __get_str(procedure),
>>> +		show_nfs_status(__entry->status)
>>> +	)
>>> +);
>>> +
>>> TRACE_EVENT(nfsd_access,
>>> 	TP_PROTO(
>>> 		const struct svc_rqst *rqstp,
>>> 
> 
> --
> Chuck Lever

--
Chuck Lever
Chuck Lever Sept. 25, 2020, 2:21 p.m. UTC | #5
> On Sep 25, 2020, at 10:17 AM, Bruce Fields <bfields@fieldses.org> wrote:
> 
> On Fri, Sep 25, 2020 at 09:59:54AM -0400, Chuck Lever wrote:
>> 
>> 
>>> On Sep 24, 2020, at 7:45 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
>>> 
>>> On Mon, Sep 21, 2020 at 02:13:07PM -0400, Chuck Lever wrote:
>>>> This is follow-on work to the tracepoints added in the NFS server's
>>>> duplicate reply cache. Here, tracepoints are introduced that report
>>>> replies from cache as well as encoding and decoding errors.
>>>> 
>>>> The NFSv2, v3, and v4 dispatcher requirements have diverged over
>>>> time, leaving us with a little bit of technical debt. In addition,
>>>> I wanted to add a tracepoint for NFSv2 and NFSv3 similar to the
>>>> nfsd4_compound/compoundstatus tracepoints. Lastly, removing some
>>>> conditional branches from this hot path helps optimize CPU
>>>> utilization. So, I've duplicated the nfsd_dispatcher function.
>>> 
>>> Comparing current nfsd_dispatch to the nfsv2/v3 nfsd_dispatch: the only
>>> thing I spotted removed from the v2/v3-specific dispatch is the
>>> rq_lease_breaker = NULL.  (I think that's not correct, actually.  We
>>> could remove the need for that to be set in the v2/v3 case, but with the
>>> current code it does need to be set.)
>> 
>> Noted with thanks.
> 
> Maybe just do this?

I'll try it out. Thanks!


> --b.
> 
> commit c7265a111269
> Author: J. Bruce Fields <bfields@redhat.com>
> Date:   Fri Sep 25 10:12:39 2020 -0400
> 
>    nfsd: rq_lease_breaker cleanup
> 
>    Since only the v4 code cares about it, maybe it's better to leave
>    rq_lease_breaker out of the common dispatch code?
> 
>    Signed-off-by: J. Bruce Fields <bfields@redhat.com>
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 62afcae18e17..a498278ba96b 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4597,6 +4597,9 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl)
> 
> 	if (!i_am_nfsd())
> 		return NULL;
> +	/* Note rq_prog == NFS_ACL_PROGRAM is also possible: */
> +	if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4)
> +		return NULL;
> 	rqst = kthread_data(current);
> 	clp = *(rqst->rq_lease_breaker);
> 	return dl->dl_stid.sc_client == clp;
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index b603dfcdd361..8d6f6f4c8b28 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -1016,7 +1016,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
> 		*statp = rpc_garbage_args;
> 		return 1;
> 	}
> -	rqstp->rq_lease_breaker = NULL;
> 	/*
> 	 * Give the xdr decoder a chance to change this if it wants
> 	 * (necessary in the NFSv4.0 compound case)

--
Chuck Lever
J. Bruce Fields Sept. 25, 2020, 2:47 p.m. UTC | #6
On Fri, Sep 25, 2020 at 09:59:54AM -0400, Chuck Lever wrote:
> 
> 
> > On Sep 24, 2020, at 7:45 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> > 
> > On Mon, Sep 21, 2020 at 02:13:07PM -0400, Chuck Lever wrote:
> >> This is follow-on work to the tracepoints added in the NFS server's
> >> duplicate reply cache. Here, tracepoints are introduced that report
> >> replies from cache as well as encoding and decoding errors.
> >> 
> >> The NFSv2, v3, and v4 dispatcher requirements have diverged over
> >> time, leaving us with a little bit of technical debt. In addition,
> >> I wanted to add a tracepoint for NFSv2 and NFSv3 similar to the
> >> nfsd4_compound/compoundstatus tracepoints. Lastly, removing some
> >> conditional branches from this hot path helps optimize CPU
> >> utilization. So, I've duplicated the nfsd_dispatcher function.
> > 
> > Comparing current nfsd_dispatch to the nfsv2/v3 nfsd_dispatch: the only
> > thing I spotted removed from the v2/v3-specific dispatch is the
> > rq_lease_breaker = NULL.  (I think that's not correct, actually.  We
> > could remove the need for that to be set in the v2/v3 case, but with the
> > current code it does need to be set.)
> 
> Noted with thanks.
> 
> 
> > Comparing current nfsd_dispatch to the nfsv4 nfsd4_dispatch, the
> > v4-specific dispatch does away with nfs_request_too_big() and the
> > v2-specific shortcut in the error encoding case.
> > 
> > So these still look *very* similar.  I don't feel like we're getting a
> > lot of benefit out of splitting these out.
> 
> I don't disagree with that at all. At this point I'm just noodling
> to see what's possible. I'm now toying with other ways to add high-
> value tracing in the legacy ULPs. In the end I might end up avoiding
> significant changes in the dispatchers in order to add tracing.

OK.

> However, a few thoughts I had while learning how the dispatcher
> code works.
> 
> There are some opportunities for reducing instruction path length
> and the number of conditional branches in here. It's a hot path,
> so I think we should consider some careful micro-optimizations
> even if they don't add significant new features or do add some
> code duplication.
> 
> In user space, the library (iirc) assumes each ULP provides it's
> own dispatcher function. I'd consider duplicating and removing
> svc_generic_dispatcher() to simplify the pasta in svc_process(),
> again as a micro-optimization and for better code legibility.

Not sure you even have to duplicate it, just export the generic
dispatcher and let individual programs point to it, right?

> lockd's pc_func returns an RPC accept_stat, but the NFSD pc_func
> methods return an NFS status. The latter feels like a layering
> violation for the sake of reducing a small amount of code
> duplication. I'd rather see encoding of the NFS status handled in
> the NFS Reply encoders, since that is an XDR function, and because
> that logic seems slightly different for NFSv2, support for which
> we'd like to deprecate at some point.
>
> Note also that *statp in nfsd_dispatch is never explicitly set to
> rpc_success in the normal execution flow. It relies on the
> equivalence of rpc_success and nfs_ok, which is convenient, but
> confusing to read. It might be cleaner if *statp was made an enum
> to make it explicit what set of values go in that return variable.

OK.

--b.
diff mbox series

Patch

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 54e597918822..894b8f0594e2 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -416,6 +416,6 @@  const struct svc_version nfsd_acl_version2 = {
 	.vs_nproc	= 5,
 	.vs_proc	= nfsd_acl_procedures2,
 	.vs_count	= nfsd_acl_count2,
-	.vs_dispatch	= nfsd_dispatch,
+	.vs_dispatch	= nfsd4_dispatch,
 	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 7f512dec7460..924ebb19c59c 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -282,7 +282,7 @@  const struct svc_version nfsd_acl_version3 = {
 	.vs_nproc	= 3,
 	.vs_proc	= nfsd_acl_procedures3,
 	.vs_count	= nfsd_acl_count3,
-	.vs_dispatch	= nfsd_dispatch,
+	.vs_dispatch	= nfsd4_dispatch,
 	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
 
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 49109645af24..61302641f651 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -3320,7 +3320,7 @@  const struct svc_version nfsd_version4 = {
 	.vs_nproc		= 2,
 	.vs_proc		= nfsd_procedures4,
 	.vs_count		= nfsd_count3,
-	.vs_dispatch		= nfsd_dispatch,
+	.vs_dispatch		= nfsd4_dispatch,
 	.vs_xdrsize		= NFS4_SVC_XDRSIZE,
 	.vs_rpcb_optnl		= true,
 	.vs_need_cong_ctrl	= true,
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index cb742e17e04a..7fa4b19dd2f7 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -78,6 +78,7 @@  extern const struct seq_operations nfs_exports_op;
  */
 int		nfsd_svc(int nrservs, struct net *net, const struct cred *cred);
 int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
+int		nfsd4_dispatch(struct svc_rqst *rqstp, __be32 *statp);
 
 int		nfsd_nrthreads(struct net *);
 int		nfsd_nrpools(struct net *);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index f7f6473578af..d626eea1c78a 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -28,6 +28,7 @@ 
 #include "vfs.h"
 #include "netns.h"
 #include "filecache.h"
+#include "trace.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
@@ -964,7 +965,7 @@  static __be32 map_new_errors(u32 vers, __be32 nfserr)
 {
 	if (nfserr == nfserr_jukebox && vers == 2)
 		return nfserr_dropit;
-	if (nfserr == nfserr_wrongsec && vers < 4)
+	if (nfserr == nfserr_wrongsec)
 		return nfserr_acces;
 	return nfserr;
 }
@@ -980,18 +981,6 @@  static __be32 map_new_errors(u32 vers, __be32 nfserr)
 static bool nfs_request_too_big(struct svc_rqst *rqstp,
 				const struct svc_procedure *proc)
 {
-	/*
-	 * The ACL code has more careful bounds-checking and is not
-	 * susceptible to this problem:
-	 */
-	if (rqstp->rq_prog != NFS_PROGRAM)
-		return false;
-	/*
-	 * Ditto NFSv4 (which can in theory have argument and reply both
-	 * more than a page):
-	 */
-	if (rqstp->rq_vers >= 4)
-		return false;
 	/* The reply will be small, we're OK: */
 	if (proc->pc_xdrressize > 0 &&
 	    proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE))
@@ -1000,81 +989,152 @@  static bool nfs_request_too_big(struct svc_rqst *rqstp,
 	return rqstp->rq_arg.len > PAGE_SIZE;
 }
 
-int
-nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+/**
+ * nfsd_dispatch - Process an NFSv2 or NFSv3 request
+ * @rqstp: incoming NFS request
+ * @statp: OUT: RPC accept_stat value
+ *
+ * Return values:
+ *  %0: Processing complete; do not send a Reply
+ *  %1: Processing complete; send a Reply
+ */
+int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
-	const struct svc_procedure *proc;
-	__be32			nfserr;
-	__be32			*nfserrp;
-
-	dprintk("nfsd_dispatch: vers %d proc %d\n",
-				rqstp->rq_vers, rqstp->rq_proc);
-	proc = rqstp->rq_procinfo;
-
-	if (nfs_request_too_big(rqstp, proc)) {
-		dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers);
-		*statp = rpc_garbage_args;
-		return 1;
+	const struct svc_procedure *proc = rqstp->rq_procinfo;
+	struct kvec *argv = &rqstp->rq_arg.head[0];
+	struct kvec *resv = &rqstp->rq_res.head[0];
+	__be32 nfserr, *nfserrp;
+
+	if (nfs_request_too_big(rqstp, proc))
+		goto out_too_large;
+
+	if (proc->pc_decode && !procp->pc_decode(rqstp, argv->iov_base))
+		goto out_decode_err;
+
+	rqstp->rq_cachetype = proc->pc_cachetype;
+	switch (nfsd_cache_lookup(rqstp)) {
+	case RC_DROPIT:
+		goto out_dropit;
+	case RC_REPLY:
+		goto out_cached_reply;
+	case RC_DOIT:
+		break;
 	}
-	rqstp->rq_lease_breaker = NULL;
+
+	nfserrp = resv->iov_base + resv->iov_len;
+	resv->iov_len += sizeof(__be32);
+	nfserr = proc->pc_func(rqstp);
+	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
+	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags))
+		goto out_update_drop;
+	if (rqstp->rq_proc)
+		*nfserrp++ = nfserr;
+
+	/* For NFSv2, additional info is never returned in case of an error. */
+	if (!(nfserr && rqstp->rq_vers == 2))
+		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp))
+			goto out_encode_err;
+
+	nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
+	trace_nfsd_svc_status(rqstp, proc, nfserr);
+	return 1;
+
+out_too_large:
+	trace_nfsd_svc_too_large_err(rqstp);
+	*statp = rpc_garbage_args;
+	return 1;
+
+out_decode_err:
+	trace_nfsd_svc_decode_err(rqstp);
+	*statp = rpc_garbage_args;
+	return 1;
+
+out_update_drop:
+	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+out_dropit:
+	trace_nfsd_svc_dropit(rqstp);
+	return 0;
+
+out_cached_reply:
+	trace_nfsd_svc_cached_reply(rqstp);
+	return 1;
+
+out_encode_err:
+	trace_nfsd_svc_encode_err(rqstp);
+	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+	*statp = rpc_system_err;
+	return 1;
+}
+
+/**
+ * nfsd4_dispatch - Process an NFSv4 or NFSACL request
+ * @rqstp: incoming NFS request
+ * @statp: OUT: RPC accept_stat value
+ *
+ * Return values:
+ *  %0: Processing complete; do not send a Reply
+ *  %1: Processing complete; send a Reply
+ */
+int nfsd4_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+{
+	const struct svc_procedure *proc = rqstp->rq_procinfo;
+	struct kvec *argv = &rqstp->rq_arg.head[0];
+	struct kvec *resv = &rqstp->rq_res.head[0];
+	__be32 nfserr, *nfserrp;
+
 	/*
 	 * Give the xdr decoder a chance to change this if it wants
 	 * (necessary in the NFSv4.0 compound case)
 	 */
 	rqstp->rq_cachetype = proc->pc_cachetype;
-	/* Decode arguments */
-	if (proc->pc_decode &&
-	    !proc->pc_decode(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base)) {
-		dprintk("nfsd: failed to decode arguments!\n");
-		*statp = rpc_garbage_args;
-		return 1;
-	}
+	rqstp->rq_lease_breaker = NULL;
+
+	if (proc->pc_decode && !procp->pc_decode(rqstp, argv->iov_base))
+		goto out_decode_err;
 
-	/* Check whether we have this call in the cache. */
 	switch (nfsd_cache_lookup(rqstp)) {
 	case RC_DROPIT:
-		return 0;
+		goto out_dropit;
 	case RC_REPLY:
-		return 1;
-	case RC_DOIT:;
-		/* do it */
+		goto out_cached_reply;
+	case RC_DOIT:
+		break;
 	}
 
-	/* need to grab the location to store the status, as
-	 * nfsv4 does some encoding while processing 
-	 */
-	nfserrp = rqstp->rq_res.head[0].iov_base
-		+ rqstp->rq_res.head[0].iov_len;
-	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
-
-	/* Now call the procedure handler, and encode NFS status. */
+	nfserrp = resv->iov_base + resv->iov_len;
+	resv->iov_len += sizeof(__be32);
 	nfserr = proc->pc_func(rqstp);
-	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
-	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
-		dprintk("nfsd: Dropping request; may be revisited later\n");
-		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
-		return 0;
-	}
-
-	if (rqstp->rq_proc != 0)
+	if (test_bit(RQ_DROPME, &rqstp->rq_flags))
+		goto out_update_drop;
+	if (rqstp->rq_proc)
 		*nfserrp++ = nfserr;
 
-	/* Encode result.
-	 * For NFSv2, additional info is never returned in case of an error.
-	 */
-	if (!(nfserr && rqstp->rq_vers == 2)) {
-		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp)) {
-			/* Failed to encode result. Release cache entry */
-			dprintk("nfsd: failed to encode result!\n");
-			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
-			*statp = rpc_system_err;
-			return 1;
-		}
-	}
+	if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp))
+		goto out_encode_err;
 
-	/* Store reply in cache. */
 	nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
 	return 1;
+
+out_decode_err:
+	trace_nfsd_svc_decode_err(rqstp);
+	*statp = rpc_garbage_args;
+	return 1;
+
+out_update_drop:
+	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+out_dropit:
+	trace_nfsd_svc_dropit(rqstp);
+	return 0;
+
+out_cached_reply:
+	trace_nfsd_svc_cached_reply(rqstp);
+	return 1;
+
+out_encode_err:
+	trace_nfsd_svc_encode_err(rqstp);
+	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+	*statp = rpc_system_err;
+	return 1;
 }
 
 int nfsd_pool_stats_open(struct inode *inode, struct file *file)
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 53115fbc00b2..50ab4a84c25f 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -32,6 +32,56 @@ 
 		{ NFSD_MAY_READ_IF_EXEC,	"READ_IF_EXEC" },	\
 		{ NFSD_MAY_64BIT_COOKIE,	"64BIT_COOKIE" })
 
+DECLARE_EVENT_CLASS(nfsd_simple_class,
+	TP_PROTO(
+		const struct svc_rqst *rqstp
+	),
+	TP_ARGS(rqstp),
+	TP_STRUCT__entry(
+		__field(u32, xid)
+	),
+	TP_fast_assign(
+		__entry->xid = be32_to_cpu(rqstp->rq_xid);
+	),
+	TP_printk("xid=0x%08x", __entry->xid)
+);
+
+#define DEFINE_NFSD_SIMPLE_EVENT(name)			\
+DEFINE_EVENT(nfsd_simple_class, nfsd_##name,		\
+	TP_PROTO(const struct svc_rqst *rqstp),		\
+	TP_ARGS(rqstp))
+
+DEFINE_NFSD_SIMPLE_EVENT(svc_too_large_err);
+DEFINE_NFSD_SIMPLE_EVENT(svc_decode_err);
+DEFINE_NFSD_SIMPLE_EVENT(svc_dropit);
+DEFINE_NFSD_SIMPLE_EVENT(svc_cached_reply);
+DEFINE_NFSD_SIMPLE_EVENT(svc_encode_err);
+
+TRACE_EVENT(nfsd_svc_status,
+	TP_PROTO(
+		const struct svc_rqst *rqstp,
+		const struct svc_procedure *proc,
+		__be32 status
+	),
+	TP_ARGS(rqstp, proc, status),
+	TP_STRUCT__entry(
+		__field(u32, xid)
+		__field(u32, version)
+		__field(unsigned long, status)
+		__string(procedure, rqstp->rq_procinfo->pc_name)
+	),
+	TP_fast_assign(
+		__entry->xid = be32_to_cpu(rqstp->rq_xid);
+		__entry->version = rqstp->rq_vers;
+		__entry->status = be32_to_cpu(status);
+		__assign_str(procedure, rqstp->rq_procinfo->pc_name);
+	),
+	TP_printk("xid=0x%08x version=%u procedure=%s status=%s",
+		__entry->xid, __entry->version, __get_str(procedure),
+		show_nfs_status(__entry->status)
+	)
+);
+
 TRACE_EVENT(nfsd_access,
 	TP_PROTO(
 		const struct svc_rqst *rqstp,