diff mbox series

[1/2] nfsd: protect concurrent access to nfsd stats counters

Message ID 20201228170344.22867-2-amir73il@gmail.com (mailing list archive)
State New, archived
Headers show
Series Improvements to nfsd stats | expand

Commit Message

Amir Goldstein Dec. 28, 2020, 5:03 p.m. UTC
nfsd stats counters can be updated by concurrent nfsd threads without any
protection.

Convert some nfsd_stats and nfsd_net struct members to use percpu counters.

There are several members of struct nfsd_stats that are reported in file
/proc/net/rpc/nfsd by never updated. Those have been left untouched.

The longest_chain* members of struct nfsd_net remain unprotected.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/nfsd/netns.h    | 20 +++++++----
 fs/nfsd/nfs4proc.c |  2 +-
 fs/nfsd/nfscache.c | 52 +++++++++++++++++++--------
 fs/nfsd/nfsctl.c   |  5 ++-
 fs/nfsd/nfsfh.c    |  2 +-
 fs/nfsd/stats.c    | 87 ++++++++++++++++++++++++++++++++++++----------
 fs/nfsd/stats.h    | 42 +++++++++++++++-------
 fs/nfsd/vfs.c      |  4 +--
 8 files changed, 156 insertions(+), 58 deletions(-)

Comments

Chuck Lever Dec. 28, 2020, 7:53 p.m. UTC | #1
Hello Amir -

> On Dec 28, 2020, at 12:03 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> 
> nfsd stats counters can be updated by concurrent nfsd threads without any
> protection.
> 
> Convert some nfsd_stats and nfsd_net struct members to use percpu counters.
> 
> There are several members of struct nfsd_stats that are reported in file
> /proc/net/rpc/nfsd by never updated. Those have been left untouched.
> 
> The longest_chain* members of struct nfsd_net remain unprotected.

I like the idea of converting these to per-CPU variables, and the
use of standards kernel helpers is clean. I haven't looked closely
at the NFSD-specific parts of 1/2 yet.

Looking forward to Bruce and Jeff's commentary.

--
Chuck Lever
J. Bruce Fields Jan. 4, 2021, 9:55 p.m. UTC | #2
Thanks for looking at this, it's long overdue!

On Mon, Dec 28, 2020 at 07:03:43PM +0200, Amir Goldstein wrote:
> nfsd stats counters can be updated by concurrent nfsd threads without any
> protection.
> 
> Convert some nfsd_stats and nfsd_net struct members to use percpu counters.
> 
> There are several members of struct nfsd_stats that are reported in file
> /proc/net/rpc/nfsd by never updated. Those have been left untouched.

Looking through the history, the code that updated fh_lookup, for
example, was removed in 2002.

I'd be OK with removing those entirely, maybe just leave a /* deprecated
field */ comment where we printk the hard-coded 0's.  If somebody wants
to know more they can still find the answers in git.

> The longest_chain* members of struct nfsd_net remain unprotected.
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/nfsd/netns.h    | 20 +++++++----
>  fs/nfsd/nfs4proc.c |  2 +-
>  fs/nfsd/nfscache.c | 52 +++++++++++++++++++--------
>  fs/nfsd/nfsctl.c   |  5 ++-
>  fs/nfsd/nfsfh.c    |  2 +-
>  fs/nfsd/stats.c    | 87 ++++++++++++++++++++++++++++++++++++----------
>  fs/nfsd/stats.h    | 42 +++++++++++++++-------
>  fs/nfsd/vfs.c      |  4 +--
>  8 files changed, 156 insertions(+), 58 deletions(-)
> 
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 7346acda9d76..080c5389b2e7 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -10,6 +10,7 @@
>  
>  #include <net/net_namespace.h>
>  #include <net/netns/generic.h>
> +#include <linux/percpu_counter.h>
>  
>  /* Hash tables for nfs4_clientid state */
>  #define CLIENT_HASH_BITS                 4
> @@ -149,20 +150,25 @@ struct nfsd_net {
>  
>  	/*
>  	 * Stats and other tracking of on the duplicate reply cache.
> -	 * These fields and the "rc" fields in nfsdstats are modified
> -	 * with only the per-bucket cache lock, which isn't really safe
> -	 * and should be fixed if we want the statistics to be
> -	 * completely accurate.
> +	 * The longest_chain* fields are modified with only the per-bucket
> +	 * cache lock, which isn't really safe and should be fixed if we want
> +	 * these statistics to be completely accurate.
>  	 */
>  
>  	/* total number of entries */
>  	atomic_t                 num_drc_entries;
>  
> +	/* Reference to below counters as array for init/destroy */
> +	struct percpu_counter    counters[0];

This feels slightly too clever for its own good, but....  OK, I see
there's a bunch of initializations to do in the nfsdstats case, and you
don't want to open code all that (and its error handling).  I guess I
don't have a better idea.  Is this a common pattern elsewhere?

Anyway, otherwise it all looks routine to me, thanks again.

--b.

>  	/* cache misses due only to checksum comparison failures */
> -	unsigned int             payload_misses;
> -
> +	struct percpu_counter    payload_misses;
>  	/* amount of memory (in bytes) currently consumed by the DRC */
> -	unsigned int             drc_mem_usage;
> +	struct percpu_counter    drc_mem_usage;
> +	/* End of counters array */
> +	struct percpu_counter    counters_end[0];
> +#define NFSD_NET_COUNTERS_NUM \
> +	((offsetof(struct nfsd_net, counters_end) - \
> +	  offsetof(struct nfsd_net, counters)) / sizeof(struct percpu_counter))
>  
>  	/* longest hash chain seen */
>  	unsigned int             longest_chain;
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index e83b21778816..0fa205d8ce49 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -2173,7 +2173,7 @@ nfsd4_proc_null(struct svc_rqst *rqstp)
>  static inline void nfsd4_increment_op_stats(u32 opnum)
>  {
>  	if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
> -		nfsdstats.nfs4_opcount[opnum]++;
> +		percpu_counter_inc(&nfsdstats.nfs4_opcount[opnum]);
>  }
>  
>  static const struct nfsd4_operation nfsd4_ops[];
> diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
> index 80c90fc231a5..4093ab25cc4d 100644
> --- a/fs/nfsd/nfscache.c
> +++ b/fs/nfsd/nfscache.c
> @@ -121,14 +121,14 @@ nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
>  				struct nfsd_net *nn)
>  {
>  	if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
> -		nn->drc_mem_usage -= rp->c_replvec.iov_len;
> +		percpu_counter_sub(&nn->drc_mem_usage, rp->c_replvec.iov_len);
>  		kfree(rp->c_replvec.iov_base);
>  	}
>  	if (rp->c_state != RC_UNUSED) {
>  		rb_erase(&rp->c_node, &b->rb_head);
>  		list_del(&rp->c_lru);
>  		atomic_dec(&nn->num_drc_entries);
> -		nn->drc_mem_usage -= sizeof(*rp);
> +		percpu_counter_sub(&nn->drc_mem_usage, sizeof(*rp));
>  	}
>  	kmem_cache_free(drc_slab, rp);
>  }
> @@ -154,6 +154,16 @@ void nfsd_drc_slab_free(void)
>  	kmem_cache_destroy(drc_slab);
>  }
>  
> +static int nfsd_reply_cache_stats_init(struct nfsd_net *nn)
> +{
> +	return nfsd_percpu_counters_init(nn->counters, NFSD_NET_COUNTERS_NUM);
> +}
> +
> +static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn)
> +{
> +	nfsd_percpu_counters_destroy(nn->counters, NFSD_NET_COUNTERS_NUM);
> +}
> +
>  int nfsd_reply_cache_init(struct nfsd_net *nn)
>  {
>  	unsigned int hashsize;
> @@ -165,12 +175,16 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
>  	hashsize = nfsd_hashsize(nn->max_drc_entries);
>  	nn->maskbits = ilog2(hashsize);
>  
> +	status = nfsd_reply_cache_stats_init(nn);
> +	if (status)
> +		goto out_nomem;
> +
>  	nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
>  	nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
>  	nn->nfsd_reply_cache_shrinker.seeks = 1;
>  	status = register_shrinker(&nn->nfsd_reply_cache_shrinker);
>  	if (status)
> -		goto out_nomem;
> +		goto out_stats_destroy;
>  
>  	nn->drc_hashtbl = kvzalloc(array_size(hashsize,
>  				sizeof(*nn->drc_hashtbl)), GFP_KERNEL);
> @@ -186,6 +200,8 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
>  	return 0;
>  out_shrinker:
>  	unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
> +out_stats_destroy:
> +	nfsd_reply_cache_stats_destroy(nn);
>  out_nomem:
>  	printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
>  	return -ENOMEM;
> @@ -196,6 +212,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
>  	struct svc_cacherep	*rp;
>  	unsigned int i;
>  
> +	nfsd_reply_cache_stats_destroy(nn);
>  	unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
>  
>  	for (i = 0; i < nn->drc_hashsize; i++) {
> @@ -324,7 +341,7 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key,
>  {
>  	if (key->c_key.k_xid == rp->c_key.k_xid &&
>  	    key->c_key.k_csum != rp->c_key.k_csum) {
> -		++nn->payload_misses;
> +		percpu_counter_inc(&nn->payload_misses);
>  		trace_nfsd_drc_mismatch(nn, key, rp);
>  	}
>  
> @@ -407,7 +424,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
>  
>  	rqstp->rq_cacherep = NULL;
>  	if (type == RC_NOCACHE) {
> -		nfsdstats.rcnocache++;
> +		percpu_counter_inc(&nfsdstats.rcnocache);
>  		goto out;
>  	}
>  
> @@ -429,12 +446,12 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
>  		goto found_entry;
>  	}
>  
> -	nfsdstats.rcmisses++;
> +	percpu_counter_inc(&nfsdstats.rcmisses);
>  	rqstp->rq_cacherep = rp;
>  	rp->c_state = RC_INPROG;
>  
>  	atomic_inc(&nn->num_drc_entries);
> -	nn->drc_mem_usage += sizeof(*rp);
> +	percpu_counter_add(&nn->drc_mem_usage, sizeof(*rp));
>  
>  	/* go ahead and prune the cache */
>  	prune_bucket(b, nn);
> @@ -446,7 +463,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
>  
>  found_entry:
>  	/* We found a matching entry which is either in progress or done. */
> -	nfsdstats.rchits++;
> +	percpu_counter_inc(&nfsdstats.rchits);
>  	rtn = RC_DROPIT;
>  
>  	/* Request being processed */
> @@ -548,7 +565,7 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
>  		return;
>  	}
>  	spin_lock(&b->cache_lock);
> -	nn->drc_mem_usage += bufsize;
> +	percpu_counter_add(&nn->drc_mem_usage, bufsize);
>  	lru_put_end(b, rp);
>  	rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
>  	rp->c_type = cachetype;
> @@ -588,13 +605,18 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
>  
>  	seq_printf(m, "max entries:           %u\n", nn->max_drc_entries);
>  	seq_printf(m, "num entries:           %u\n",
> -			atomic_read(&nn->num_drc_entries));
> +		   atomic_read(&nn->num_drc_entries));
>  	seq_printf(m, "hash buckets:          %u\n", 1 << nn->maskbits);
> -	seq_printf(m, "mem usage:             %u\n", nn->drc_mem_usage);
> -	seq_printf(m, "cache hits:            %u\n", nfsdstats.rchits);
> -	seq_printf(m, "cache misses:          %u\n", nfsdstats.rcmisses);
> -	seq_printf(m, "not cached:            %u\n", nfsdstats.rcnocache);
> -	seq_printf(m, "payload misses:        %u\n", nn->payload_misses);
> +	seq_printf(m, "mem usage:             %lld\n",
> +		   percpu_counter_sum_positive(&nn->drc_mem_usage));
> +	seq_printf(m, "cache hits:            %lld\n",
> +		   percpu_counter_sum_positive(&nfsdstats.rchits));
> +	seq_printf(m, "cache misses:          %lld\n",
> +		   percpu_counter_sum_positive(&nfsdstats.rcmisses));
> +	seq_printf(m, "not cached:            %lld\n",
> +		   percpu_counter_sum_positive(&nfsdstats.rcnocache));
> +	seq_printf(m, "payload misses:        %lld\n",
> +		   percpu_counter_sum_positive(&nn->payload_misses));
>  	seq_printf(m, "longest chain len:     %u\n", nn->longest_chain);
>  	seq_printf(m, "cachesize at longest:  %u\n", nn->longest_chain_cachesize);
>  	return 0;
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index f6d5d783f4a4..258605ee49b8 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -1534,7 +1534,9 @@ static int __init init_nfsd(void)
>  	retval = nfsd4_init_pnfs();
>  	if (retval)
>  		goto out_free_slabs;
> -	nfsd_stat_init();	/* Statistics */
> +	retval = nfsd_stat_init();	/* Statistics */
> +	if (retval)
> +		goto out_free_pnfs;
>  	retval = nfsd_drc_slab_create();
>  	if (retval)
>  		goto out_free_stat;
> @@ -1554,6 +1556,7 @@ static int __init init_nfsd(void)
>  	nfsd_drc_slab_free();
>  out_free_stat:
>  	nfsd_stat_shutdown();
> +out_free_pnfs:
>  	nfsd4_exit_pnfs();
>  out_free_slabs:
>  	nfsd4_free_slabs();
> diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
> index c81dbbad8792..1879758bbaa5 100644
> --- a/fs/nfsd/nfsfh.c
> +++ b/fs/nfsd/nfsfh.c
> @@ -400,7 +400,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
>  	}
>  out:
>  	if (error == nfserr_stale)
> -		nfsdstats.fh_stale++;
> +		percpu_counter_inc(&nfsdstats.fh_stale);
>  	return error;
>  }
>  
> diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
> index b1bc582b0493..7bef1e7139d7 100644
> --- a/fs/nfsd/stats.c
> +++ b/fs/nfsd/stats.c
> @@ -38,17 +38,17 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
>  {
>  	int i;
>  
> -	seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n",
> -		      nfsdstats.rchits,
> -		      nfsdstats.rcmisses,
> -		      nfsdstats.rcnocache,
> -		      nfsdstats.fh_stale,
> -		      nfsdstats.fh_lookup,
> -		      nfsdstats.fh_anon,
> -		      nfsdstats.fh_nocache_dir,
> -		      nfsdstats.fh_nocache_nondir,
> -		      nfsdstats.io_read,
> -		      nfsdstats.io_write);
> +	seq_printf(seq, "rc %lld %lld %lld\nfh %lld %u %u %u %u\nio %lld %lld\n",
> +		   percpu_counter_sum_positive(&nfsdstats.rchits),
> +		   percpu_counter_sum_positive(&nfsdstats.rcmisses),
> +		   percpu_counter_sum_positive(&nfsdstats.rcnocache),
> +		   percpu_counter_sum_positive(&nfsdstats.fh_stale),
> +		   nfsdstats.fh_lookup,
> +		   nfsdstats.fh_anon,
> +		   nfsdstats.fh_nocache_dir,
> +		   nfsdstats.fh_nocache_nondir,
> +		   percpu_counter_sum_positive(&nfsdstats.io_read),
> +		   percpu_counter_sum_positive(&nfsdstats.io_write));
>  	/* thread usage: */
>  	seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt);
>  	for (i=0; i<10; i++) {
> @@ -62,7 +62,7 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
>  	for (i=0; i<11; i++)
>  		seq_printf(seq, " %u", nfsdstats.ra_depth[i]);
>  	seq_putc(seq, '\n');
> -	
> +
>  	/* show my rpc info */
>  	svc_seq_show(seq, &nfsd_svcstats);
>  
> @@ -70,8 +70,10 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
>  	/* Show count for individual nfsv4 operations */
>  	/* Writing operation numbers 0 1 2 also for maintaining uniformity */
>  	seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1);
> -	for (i = 0; i <= LAST_NFS4_OP; i++)
> -		seq_printf(seq, " %u", nfsdstats.nfs4_opcount[i]);
> +	for (i = 0; i <= LAST_NFS4_OP; i++) {
> +		seq_printf(seq, " %lld",
> +			   percpu_counter_sum_positive(&nfsdstats.nfs4_opcount[i]));
> +	}
>  
>  	seq_putc(seq, '\n');
>  #endif
> @@ -91,14 +93,63 @@ static const struct proc_ops nfsd_proc_ops = {
>  	.proc_release	= single_release,
>  };
>  
> -void
> -nfsd_stat_init(void)
> +int nfsd_percpu_counters_init(struct percpu_counter counters[], int num)
>  {
> +	int i, err = 0;
> +
> +	for (i = 0; !err && i < num; i++)
> +		err = percpu_counter_init(&counters[i], 0, GFP_KERNEL);
> +
> +	if (!err)
> +		return 0;
> +
> +	for (; i > 0; i--)
> +		percpu_counter_destroy(&counters[i-1]);
> +
> +	return err;
> +}
> +
> +void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num)
> +{
> +	int i;
> +
> +	for (i = 0; i < num; i++)
> +		percpu_counter_set(&counters[i], 0);
> +}
> +
> +void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num)
> +{
> +	int i;
> +
> +	for (i = 0; i < num; i++)
> +		percpu_counter_destroy(&counters[i]);
> +}
> +
> +static int nfsd_stat_counters_init(void)
> +{
> +	return nfsd_percpu_counters_init(nfsdstats.counters, NFSD_STATS_COUNTERS_NUM);
> +}
> +
> +static void nfsd_stat_counters_destroy(void)
> +{
> +	nfsd_percpu_counters_destroy(nfsdstats.counters, NFSD_STATS_COUNTERS_NUM);
> +}
> +
> +int nfsd_stat_init(void)
> +{
> +	int err;
> +
> +	err = nfsd_stat_counters_init();
> +	if (err)
> +		return err;
> +
>  	svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops);
> +
> +	return 0;
>  }
>  
> -void
> -nfsd_stat_shutdown(void)
> +void nfsd_stat_shutdown(void)
>  {
> +	nfsd_stat_counters_destroy();
>  	svc_proc_unregister(&init_net, "nfsd");
>  }
> diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
> index b23fdac69820..ad52a916375e 100644
> --- a/fs/nfsd/stats.h
> +++ b/fs/nfsd/stats.h
> @@ -8,37 +8,53 @@
>  #define _NFSD_STATS_H
>  
>  #include <uapi/linux/nfsd/stats.h>
> +#include <linux/percpu_counter.h>
>  
>  
>  struct nfsd_stats {
> -	unsigned int	rchits;		/* repcache hits */
> -	unsigned int	rcmisses;	/* repcache hits */
> -	unsigned int	rcnocache;	/* uncached reqs */
> -	unsigned int	fh_stale;	/* FH stale error */
> +	/* Reference to below counters as array for init/destroy */
> +	struct percpu_counter	counters[0];
> +	struct percpu_counter   rchits;         /* repcache hits */
> +	struct percpu_counter   rcmisses;       /* repcache hits */
> +	struct percpu_counter   rcnocache;      /* uncached reqs */
> +	struct percpu_counter   fh_stale;       /* FH stale error */
> +	struct percpu_counter   io_read;        /* bytes returned to read requests */
> +	struct percpu_counter   io_write;       /* bytes passed in write requests */
> +#ifdef CONFIG_NFSD_V4
> +	/* Counters of individual nfsv4 operations */
> +	struct percpu_counter	nfs4_opcount[LAST_NFS4_OP + 1];
> +#endif
> +	/* End of array of couters */
> +	struct percpu_counter	counters_end[0];
> +#define NFSD_STATS_COUNTERS_NUM \
> +	((offsetof(struct nfsd_stats, counters_end) - \
> +	  offsetof(struct nfsd_stats, counters)) / sizeof(struct percpu_counter))
> +
> +	/* Protected by nfsd_mutex */
> +	unsigned int	th_cnt;		/* number of available threads */
> +
> +	/* Not updated at all?? */
>  	unsigned int	fh_lookup;	/* dentry cached */
>  	unsigned int	fh_anon;	/* anon file dentry returned */
>  	unsigned int	fh_nocache_dir;	/* filehandle not found in dcache */
>  	unsigned int	fh_nocache_nondir;	/* filehandle not found in dcache */
> -	unsigned int	io_read;	/* bytes returned to read requests */
> -	unsigned int	io_write;	/* bytes passed in write requests */
> -	unsigned int	th_cnt;		/* number of available threads */
>  	unsigned int	th_usage[10];	/* number of ticks during which n perdeciles
>  					 * of available threads were in use */
>  	unsigned int	th_fullcnt;	/* number of times last free thread was used */
>  	unsigned int	ra_size;	/* size of ra cache */
>  	unsigned int	ra_depth[11];	/* number of times ra entry was found that deep
>  					 * in the cache (10percentiles). [10] = not found */
> -#ifdef CONFIG_NFSD_V4
> -	unsigned int	nfs4_opcount[LAST_NFS4_OP + 1];	/* count of individual nfsv4 operations */
> -#endif
> -
>  };
>  
>  
>  extern struct nfsd_stats	nfsdstats;
> +
>  extern struct svc_stat		nfsd_svcstats;
>  
> -void	nfsd_stat_init(void);
> -void	nfsd_stat_shutdown(void);
> +int nfsd_percpu_counters_init(struct percpu_counter counters[], int num);
> +void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num);
> +void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num);
> +int nfsd_stat_init(void);
> +void nfsd_stat_shutdown(void);
>  
>  #endif /* _NFSD_STATS_H */
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 1ecaceebee13..6adb7aba2575 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -889,7 +889,7 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  			       unsigned long *count, u32 *eof, ssize_t host_err)
>  {
>  	if (host_err >= 0) {
> -		nfsdstats.io_read += host_err;
> +		percpu_counter_add(&nfsdstats.io_read, host_err);
>  		*eof = nfsd_eof_on_read(file, offset, host_err, *count);
>  		*count = host_err;
>  		fsnotify_access(file);
> @@ -1031,7 +1031,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
>  		goto out_nfserr;
>  	}
>  	*cnt = host_err;
> -	nfsdstats.io_write += *cnt;
> +	percpu_counter_add(&nfsdstats.io_write, *cnt);
>  	fsnotify_modify(file);
>  
>  	if (stable && use_wgather) {
> -- 
> 2.17.1
Amir Goldstein Jan. 4, 2021, 10:22 p.m. UTC | #3
On Mon, Jan 4, 2021 at 11:55 PM J . Bruce Fields <bfields@fieldses.org> wrote:
>
> Thanks for looking at this, it's long overdue!
>
> On Mon, Dec 28, 2020 at 07:03:43PM +0200, Amir Goldstein wrote:
> > nfsd stats counters can be updated by concurrent nfsd threads without any
> > protection.
> >
> > Convert some nfsd_stats and nfsd_net struct members to use percpu counters.
> >
> > There are several members of struct nfsd_stats that are reported in file
> > /proc/net/rpc/nfsd by never updated. Those have been left untouched.
>
> Looking through the history, the code that updated fh_lookup, for
> example, was removed in 2002.
>
> I'd be OK with removing those entirely, maybe just leave a /* deprecated
> field */ comment where we printk the hard-coded 0's.  If somebody wants
> to know more they can still find the answers in git.
>

Sure. I can send a followup patch.

> > The longest_chain* members of struct nfsd_net remain unprotected.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> >  fs/nfsd/netns.h    | 20 +++++++----
> >  fs/nfsd/nfs4proc.c |  2 +-
> >  fs/nfsd/nfscache.c | 52 +++++++++++++++++++--------
> >  fs/nfsd/nfsctl.c   |  5 ++-
> >  fs/nfsd/nfsfh.c    |  2 +-
> >  fs/nfsd/stats.c    | 87 ++++++++++++++++++++++++++++++++++++----------
> >  fs/nfsd/stats.h    | 42 +++++++++++++++-------
> >  fs/nfsd/vfs.c      |  4 +--
> >  8 files changed, 156 insertions(+), 58 deletions(-)
> >
> > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > index 7346acda9d76..080c5389b2e7 100644
> > --- a/fs/nfsd/netns.h
> > +++ b/fs/nfsd/netns.h
> > @@ -10,6 +10,7 @@
> >
> >  #include <net/net_namespace.h>
> >  #include <net/netns/generic.h>
> > +#include <linux/percpu_counter.h>
> >
> >  /* Hash tables for nfs4_clientid state */
> >  #define CLIENT_HASH_BITS                 4
> > @@ -149,20 +150,25 @@ struct nfsd_net {
> >
> >       /*
> >        * Stats and other tracking of on the duplicate reply cache.
> > -      * These fields and the "rc" fields in nfsdstats are modified
> > -      * with only the per-bucket cache lock, which isn't really safe
> > -      * and should be fixed if we want the statistics to be
> > -      * completely accurate.
> > +      * The longest_chain* fields are modified with only the per-bucket
> > +      * cache lock, which isn't really safe and should be fixed if we want
> > +      * these statistics to be completely accurate.
> >        */
> >
> >       /* total number of entries */
> >       atomic_t                 num_drc_entries;
> >
> > +     /* Reference to below counters as array for init/destroy */
> > +     struct percpu_counter    counters[0];
>
> This feels slightly too clever for its own good, but....  OK, I see
> there's a bunch of initializations to do in the nfsdstats case, and you
> don't want to open code all that (and its error handling).  I guess I

Yeh, look at ceph_metric_init() and imagine what nfsdstats init
would look like.

> don't have a better idea.  Is this a common pattern elsewhere?
>

Sort of. Inspired by xfsstats and related macros (fs/xfs/xfs_stats.h).
I have tried several approaches and this one ended up being the
cleanest and smallest patch.

The cleaner way would be an actual percpu_counter array and
convert all callers to use enum index to array like the dqstats counters
(include/linux/quota.h), but IMO current patch is enough.

Thanks,
Amir.
J. Bruce Fields Jan. 4, 2021, 10:34 p.m. UTC | #4
On Tue, Jan 05, 2021 at 12:22:27AM +0200, Amir Goldstein wrote:
> On Mon, Jan 4, 2021 at 11:55 PM J . Bruce Fields <bfields@fieldses.org> wrote:
> >
> > Thanks for looking at this, it's long overdue!
> >
> > On Mon, Dec 28, 2020 at 07:03:43PM +0200, Amir Goldstein wrote:
> > > nfsd stats counters can be updated by concurrent nfsd threads without any
> > > protection.
> > >
> > > Convert some nfsd_stats and nfsd_net struct members to use percpu counters.
> > >
> > > There are several members of struct nfsd_stats that are reported in file
> > > /proc/net/rpc/nfsd by never updated. Those have been left untouched.
> >
> > Looking through the history, the code that updated fh_lookup, for
> > example, was removed in 2002.
> >
> > I'd be OK with removing those entirely, maybe just leave a /* deprecated
> > field */ comment where we printk the hard-coded 0's.  If somebody wants
> > to know more they can still find the answers in git.
> >
> 
> Sure. I can send a followup patch.
> 
> > > The longest_chain* members of struct nfsd_net remain unprotected.
> > >
> > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > ---
> > >  fs/nfsd/netns.h    | 20 +++++++----
> > >  fs/nfsd/nfs4proc.c |  2 +-
> > >  fs/nfsd/nfscache.c | 52 +++++++++++++++++++--------
> > >  fs/nfsd/nfsctl.c   |  5 ++-
> > >  fs/nfsd/nfsfh.c    |  2 +-
> > >  fs/nfsd/stats.c    | 87 ++++++++++++++++++++++++++++++++++++----------
> > >  fs/nfsd/stats.h    | 42 +++++++++++++++-------
> > >  fs/nfsd/vfs.c      |  4 +--
> > >  8 files changed, 156 insertions(+), 58 deletions(-)
> > >
> > > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > > index 7346acda9d76..080c5389b2e7 100644
> > > --- a/fs/nfsd/netns.h
> > > +++ b/fs/nfsd/netns.h
> > > @@ -10,6 +10,7 @@
> > >
> > >  #include <net/net_namespace.h>
> > >  #include <net/netns/generic.h>
> > > +#include <linux/percpu_counter.h>
> > >
> > >  /* Hash tables for nfs4_clientid state */
> > >  #define CLIENT_HASH_BITS                 4
> > > @@ -149,20 +150,25 @@ struct nfsd_net {
> > >
> > >       /*
> > >        * Stats and other tracking of on the duplicate reply cache.
> > > -      * These fields and the "rc" fields in nfsdstats are modified
> > > -      * with only the per-bucket cache lock, which isn't really safe
> > > -      * and should be fixed if we want the statistics to be
> > > -      * completely accurate.
> > > +      * The longest_chain* fields are modified with only the per-bucket
> > > +      * cache lock, which isn't really safe and should be fixed if we want
> > > +      * these statistics to be completely accurate.
> > >        */
> > >
> > >       /* total number of entries */
> > >       atomic_t                 num_drc_entries;
> > >
> > > +     /* Reference to below counters as array for init/destroy */
> > > +     struct percpu_counter    counters[0];
> >
> > This feels slightly too clever for its own good, but....  OK, I see
> > there's a bunch of initializations to do in the nfsdstats case, and you
> > don't want to open code all that (and its error handling).  I guess I
> 
> Yeh, look at ceph_metric_init() and imagine what nfsdstats init
> would look like.
> 
> > don't have a better idea.  Is this a common pattern elsewhere?
> >
> 
> Sort of. Inspired by xfsstats and related macros (fs/xfs/xfs_stats.h).
> I have tried several approaches and this one ended up being the
> cleanest and smallest patch.
> 
> The cleaner way would be an actual percpu_counter array and
> convert all callers to use enum index to array like the dqstats counters
> (include/linux/quota.h), but IMO current patch is enough.

OK, I can live with that.

--b.
Chuck Lever Jan. 5, 2021, 2:12 a.m. UTC | #5
> On Jan 4, 2021, at 5:22 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> 
> On Mon, Jan 4, 2021 at 11:55 PM J . Bruce Fields <bfields@fieldses.org> wrote:
>> 
>> Thanks for looking at this, it's long overdue!
>> 
>> On Mon, Dec 28, 2020 at 07:03:43PM +0200, Amir Goldstein wrote:
>>> nfsd stats counters can be updated by concurrent nfsd threads without any
>>> protection.
>>> 
>>> Convert some nfsd_stats and nfsd_net struct members to use percpu counters.
>>> 
>>> There are several members of struct nfsd_stats that are reported in file
>>> /proc/net/rpc/nfsd by never updated. Those have been left untouched.
>> 
>> Looking through the history, the code that updated fh_lookup, for
>> example, was removed in 2002.
>> 
>> I'd be OK with removing those entirely, maybe just leave a /* deprecated
>> field */ comment where we printk the hard-coded 0's.  If somebody wants
>> to know more they can still find the answers in git.
>> 
> 
> Sure. I can send a followup patch.
> 
>>> The longest_chain* members of struct nfsd_net remain unprotected.
>>> 
>>> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>>> ---
>>> fs/nfsd/netns.h    | 20 +++++++----
>>> fs/nfsd/nfs4proc.c |  2 +-
>>> fs/nfsd/nfscache.c | 52 +++++++++++++++++++--------
>>> fs/nfsd/nfsctl.c   |  5 ++-
>>> fs/nfsd/nfsfh.c    |  2 +-
>>> fs/nfsd/stats.c    | 87 ++++++++++++++++++++++++++++++++++++----------
>>> fs/nfsd/stats.h    | 42 +++++++++++++++-------
>>> fs/nfsd/vfs.c      |  4 +--
>>> 8 files changed, 156 insertions(+), 58 deletions(-)
>>> 
>>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
>>> index 7346acda9d76..080c5389b2e7 100644
>>> --- a/fs/nfsd/netns.h
>>> +++ b/fs/nfsd/netns.h
>>> @@ -10,6 +10,7 @@
>>> 
>>> #include <net/net_namespace.h>
>>> #include <net/netns/generic.h>
>>> +#include <linux/percpu_counter.h>
>>> 
>>> /* Hash tables for nfs4_clientid state */
>>> #define CLIENT_HASH_BITS                 4
>>> @@ -149,20 +150,25 @@ struct nfsd_net {
>>> 
>>>      /*
>>>       * Stats and other tracking of on the duplicate reply cache.
>>> -      * These fields and the "rc" fields in nfsdstats are modified
>>> -      * with only the per-bucket cache lock, which isn't really safe
>>> -      * and should be fixed if we want the statistics to be
>>> -      * completely accurate.
>>> +      * The longest_chain* fields are modified with only the per-bucket
>>> +      * cache lock, which isn't really safe and should be fixed if we want
>>> +      * these statistics to be completely accurate.
>>>       */
>>> 
>>>      /* total number of entries */
>>>      atomic_t                 num_drc_entries;
>>> 
>>> +     /* Reference to below counters as array for init/destroy */
>>> +     struct percpu_counter    counters[0];
>> 
>> This feels slightly too clever for its own good, but....  OK, I see
>> there's a bunch of initializations to do in the nfsdstats case, and you
>> don't want to open code all that (and its error handling).  I guess I
> 
> Yeh, look at ceph_metric_init() and imagine what nfsdstats init
> would look like.
> 
>> don't have a better idea.  Is this a common pattern elsewhere?
>> 
> 
> Sort of. Inspired by xfsstats and related macros (fs/xfs/xfs_stats.h).
> I have tried several approaches and this one ended up being the
> cleanest and smallest patch.
> 
> The cleaner way would be an actual percpu_counter array and
> convert all callers to use enum index to array like the dqstats counters
> (include/linux/quota.h), but IMO current patch is enough.

I'd prefer the "array with enum indices" approach. The current
patch risks breaking C aliasing rules, IMHO -- with undefined
consequences. I don't see a compelling reason we have to take
that risk, however small it might be.

At the very least, you could make the counters array and the
set of counters into a union, but I'd prefer always accessing
the underlying memory using the same high-level language
constructs. That way, humans and compilers agree on what's
going on here.


--
Chuck Lever
Amir Goldstein Jan. 5, 2021, 6:29 a.m. UTC | #6
On Tue, Jan 5, 2021 at 4:12 AM Chuck Lever <chuck.lever@oracle.com> wrote:
>
>
>
> > On Jan 4, 2021, at 5:22 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Mon, Jan 4, 2021 at 11:55 PM J . Bruce Fields <bfields@fieldses.org> wrote:
> >>
> >> Thanks for looking at this, it's long overdue!
> >>
> >> On Mon, Dec 28, 2020 at 07:03:43PM +0200, Amir Goldstein wrote:
> >>> nfsd stats counters can be updated by concurrent nfsd threads without any
> >>> protection.
> >>>
> >>> Convert some nfsd_stats and nfsd_net struct members to use percpu counters.
> >>>
> >>> There are several members of struct nfsd_stats that are reported in file
> >>> /proc/net/rpc/nfsd by never updated. Those have been left untouched.
> >>
> >> Looking through the history, the code that updated fh_lookup, for
> >> example, was removed in 2002.
> >>
> >> I'd be OK with removing those entirely, maybe just leave a /* deprecated
> >> field */ comment where we printk the hard-coded 0's.  If somebody wants
> >> to know more they can still find the answers in git.
> >>
> >
> > Sure. I can send a followup patch.
> >
> >>> The longest_chain* members of struct nfsd_net remain unprotected.
> >>>
> >>> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> >>> ---
> >>> fs/nfsd/netns.h    | 20 +++++++----
> >>> fs/nfsd/nfs4proc.c |  2 +-
> >>> fs/nfsd/nfscache.c | 52 +++++++++++++++++++--------
> >>> fs/nfsd/nfsctl.c   |  5 ++-
> >>> fs/nfsd/nfsfh.c    |  2 +-
> >>> fs/nfsd/stats.c    | 87 ++++++++++++++++++++++++++++++++++++----------
> >>> fs/nfsd/stats.h    | 42 +++++++++++++++-------
> >>> fs/nfsd/vfs.c      |  4 +--
> >>> 8 files changed, 156 insertions(+), 58 deletions(-)
> >>>
> >>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> >>> index 7346acda9d76..080c5389b2e7 100644
> >>> --- a/fs/nfsd/netns.h
> >>> +++ b/fs/nfsd/netns.h
> >>> @@ -10,6 +10,7 @@
> >>>
> >>> #include <net/net_namespace.h>
> >>> #include <net/netns/generic.h>
> >>> +#include <linux/percpu_counter.h>
> >>>
> >>> /* Hash tables for nfs4_clientid state */
> >>> #define CLIENT_HASH_BITS                 4
> >>> @@ -149,20 +150,25 @@ struct nfsd_net {
> >>>
> >>>      /*
> >>>       * Stats and other tracking of on the duplicate reply cache.
> >>> -      * These fields and the "rc" fields in nfsdstats are modified
> >>> -      * with only the per-bucket cache lock, which isn't really safe
> >>> -      * and should be fixed if we want the statistics to be
> >>> -      * completely accurate.
> >>> +      * The longest_chain* fields are modified with only the per-bucket
> >>> +      * cache lock, which isn't really safe and should be fixed if we want
> >>> +      * these statistics to be completely accurate.
> >>>       */
> >>>
> >>>      /* total number of entries */
> >>>      atomic_t                 num_drc_entries;
> >>>
> >>> +     /* Reference to below counters as array for init/destroy */
> >>> +     struct percpu_counter    counters[0];
> >>
> >> This feels slightly too clever for its own good, but....  OK, I see
> >> there's a bunch of initializations to do in the nfsdstats case, and you
> >> don't want to open code all that (and its error handling).  I guess I
> >
> > Yeh, look at ceph_metric_init() and imagine what nfsdstats init
> > would look like.
> >
> >> don't have a better idea.  Is this a common pattern elsewhere?
> >>
> >
> > Sort of. Inspired by xfsstats and related macros (fs/xfs/xfs_stats.h).
> > I have tried several approaches and this one ended up being the
> > cleanest and smallest patch.
> >
> > The cleaner way would be an actual percpu_counter array and
> > convert all callers to use enum index to array like the dqstats counters
> > (include/linux/quota.h), but IMO current patch is enough.
>
> I'd prefer the "array with enum indices" approach. The current
> patch risks breaking C aliasing rules, IMHO -- with undefined
> consequences. I don't see a compelling reason we have to take
> that risk, however small it might be.
>
> At the very least, you could make the counters array and the
> set of counters into a union, but I'd prefer always accessing
> the underlying memory using the same high-level language
> constructs. That way, humans and compilers agree on what's
> going on here.
>

No problem. I just cannot resist a good shortcut when I see one ;-)
enum indices it shall be.

Thanks,
Amir.
diff mbox series

Patch

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 7346acda9d76..080c5389b2e7 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -10,6 +10,7 @@ 
 
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <linux/percpu_counter.h>
 
 /* Hash tables for nfs4_clientid state */
 #define CLIENT_HASH_BITS                 4
@@ -149,20 +150,25 @@  struct nfsd_net {
 
 	/*
 	 * Stats and other tracking of on the duplicate reply cache.
-	 * These fields and the "rc" fields in nfsdstats are modified
-	 * with only the per-bucket cache lock, which isn't really safe
-	 * and should be fixed if we want the statistics to be
-	 * completely accurate.
+	 * The longest_chain* fields are modified with only the per-bucket
+	 * cache lock, which isn't really safe and should be fixed if we want
+	 * these statistics to be completely accurate.
 	 */
 
 	/* total number of entries */
 	atomic_t                 num_drc_entries;
 
+	/* Reference to below counters as array for init/destroy */
+	struct percpu_counter    counters[0];
 	/* cache misses due only to checksum comparison failures */
-	unsigned int             payload_misses;
-
+	struct percpu_counter    payload_misses;
 	/* amount of memory (in bytes) currently consumed by the DRC */
-	unsigned int             drc_mem_usage;
+	struct percpu_counter    drc_mem_usage;
+	/* End of counters array */
+	struct percpu_counter    counters_end[0];
+#define NFSD_NET_COUNTERS_NUM \
+	((offsetof(struct nfsd_net, counters_end) - \
+	  offsetof(struct nfsd_net, counters)) / sizeof(struct percpu_counter))
 
 	/* longest hash chain seen */
 	unsigned int             longest_chain;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e83b21778816..0fa205d8ce49 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2173,7 +2173,7 @@  nfsd4_proc_null(struct svc_rqst *rqstp)
 static inline void nfsd4_increment_op_stats(u32 opnum)
 {
 	if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
-		nfsdstats.nfs4_opcount[opnum]++;
+		percpu_counter_inc(&nfsdstats.nfs4_opcount[opnum]);
 }
 
 static const struct nfsd4_operation nfsd4_ops[];
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 80c90fc231a5..4093ab25cc4d 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -121,14 +121,14 @@  nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
 				struct nfsd_net *nn)
 {
 	if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
-		nn->drc_mem_usage -= rp->c_replvec.iov_len;
+		percpu_counter_sub(&nn->drc_mem_usage, rp->c_replvec.iov_len);
 		kfree(rp->c_replvec.iov_base);
 	}
 	if (rp->c_state != RC_UNUSED) {
 		rb_erase(&rp->c_node, &b->rb_head);
 		list_del(&rp->c_lru);
 		atomic_dec(&nn->num_drc_entries);
-		nn->drc_mem_usage -= sizeof(*rp);
+		percpu_counter_sub(&nn->drc_mem_usage, sizeof(*rp));
 	}
 	kmem_cache_free(drc_slab, rp);
 }
@@ -154,6 +154,16 @@  void nfsd_drc_slab_free(void)
 	kmem_cache_destroy(drc_slab);
 }
 
+static int nfsd_reply_cache_stats_init(struct nfsd_net *nn)
+{
+	return nfsd_percpu_counters_init(nn->counters, NFSD_NET_COUNTERS_NUM);
+}
+
+static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn)
+{
+	nfsd_percpu_counters_destroy(nn->counters, NFSD_NET_COUNTERS_NUM);
+}
+
 int nfsd_reply_cache_init(struct nfsd_net *nn)
 {
 	unsigned int hashsize;
@@ -165,12 +175,16 @@  int nfsd_reply_cache_init(struct nfsd_net *nn)
 	hashsize = nfsd_hashsize(nn->max_drc_entries);
 	nn->maskbits = ilog2(hashsize);
 
+	status = nfsd_reply_cache_stats_init(nn);
+	if (status)
+		goto out_nomem;
+
 	nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
 	nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
 	nn->nfsd_reply_cache_shrinker.seeks = 1;
 	status = register_shrinker(&nn->nfsd_reply_cache_shrinker);
 	if (status)
-		goto out_nomem;
+		goto out_stats_destroy;
 
 	nn->drc_hashtbl = kvzalloc(array_size(hashsize,
 				sizeof(*nn->drc_hashtbl)), GFP_KERNEL);
@@ -186,6 +200,8 @@  int nfsd_reply_cache_init(struct nfsd_net *nn)
 	return 0;
 out_shrinker:
 	unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
+out_stats_destroy:
+	nfsd_reply_cache_stats_destroy(nn);
 out_nomem:
 	printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
 	return -ENOMEM;
@@ -196,6 +212,7 @@  void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
 	struct svc_cacherep	*rp;
 	unsigned int i;
 
+	nfsd_reply_cache_stats_destroy(nn);
 	unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
 
 	for (i = 0; i < nn->drc_hashsize; i++) {
@@ -324,7 +341,7 @@  nfsd_cache_key_cmp(const struct svc_cacherep *key,
 {
 	if (key->c_key.k_xid == rp->c_key.k_xid &&
 	    key->c_key.k_csum != rp->c_key.k_csum) {
-		++nn->payload_misses;
+		percpu_counter_inc(&nn->payload_misses);
 		trace_nfsd_drc_mismatch(nn, key, rp);
 	}
 
@@ -407,7 +424,7 @@  int nfsd_cache_lookup(struct svc_rqst *rqstp)
 
 	rqstp->rq_cacherep = NULL;
 	if (type == RC_NOCACHE) {
-		nfsdstats.rcnocache++;
+		percpu_counter_inc(&nfsdstats.rcnocache);
 		goto out;
 	}
 
@@ -429,12 +446,12 @@  int nfsd_cache_lookup(struct svc_rqst *rqstp)
 		goto found_entry;
 	}
 
-	nfsdstats.rcmisses++;
+	percpu_counter_inc(&nfsdstats.rcmisses);
 	rqstp->rq_cacherep = rp;
 	rp->c_state = RC_INPROG;
 
 	atomic_inc(&nn->num_drc_entries);
-	nn->drc_mem_usage += sizeof(*rp);
+	percpu_counter_add(&nn->drc_mem_usage, sizeof(*rp));
 
 	/* go ahead and prune the cache */
 	prune_bucket(b, nn);
@@ -446,7 +463,7 @@  int nfsd_cache_lookup(struct svc_rqst *rqstp)
 
 found_entry:
 	/* We found a matching entry which is either in progress or done. */
-	nfsdstats.rchits++;
+	percpu_counter_inc(&nfsdstats.rchits);
 	rtn = RC_DROPIT;
 
 	/* Request being processed */
@@ -548,7 +565,7 @@  void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 		return;
 	}
 	spin_lock(&b->cache_lock);
-	nn->drc_mem_usage += bufsize;
+	percpu_counter_add(&nn->drc_mem_usage, bufsize);
 	lru_put_end(b, rp);
 	rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
 	rp->c_type = cachetype;
@@ -588,13 +605,18 @@  static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
 
 	seq_printf(m, "max entries:           %u\n", nn->max_drc_entries);
 	seq_printf(m, "num entries:           %u\n",
-			atomic_read(&nn->num_drc_entries));
+		   atomic_read(&nn->num_drc_entries));
 	seq_printf(m, "hash buckets:          %u\n", 1 << nn->maskbits);
-	seq_printf(m, "mem usage:             %u\n", nn->drc_mem_usage);
-	seq_printf(m, "cache hits:            %u\n", nfsdstats.rchits);
-	seq_printf(m, "cache misses:          %u\n", nfsdstats.rcmisses);
-	seq_printf(m, "not cached:            %u\n", nfsdstats.rcnocache);
-	seq_printf(m, "payload misses:        %u\n", nn->payload_misses);
+	seq_printf(m, "mem usage:             %lld\n",
+		   percpu_counter_sum_positive(&nn->drc_mem_usage));
+	seq_printf(m, "cache hits:            %lld\n",
+		   percpu_counter_sum_positive(&nfsdstats.rchits));
+	seq_printf(m, "cache misses:          %lld\n",
+		   percpu_counter_sum_positive(&nfsdstats.rcmisses));
+	seq_printf(m, "not cached:            %lld\n",
+		   percpu_counter_sum_positive(&nfsdstats.rcnocache));
+	seq_printf(m, "payload misses:        %lld\n",
+		   percpu_counter_sum_positive(&nn->payload_misses));
 	seq_printf(m, "longest chain len:     %u\n", nn->longest_chain);
 	seq_printf(m, "cachesize at longest:  %u\n", nn->longest_chain_cachesize);
 	return 0;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f6d5d783f4a4..258605ee49b8 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1534,7 +1534,9 @@  static int __init init_nfsd(void)
 	retval = nfsd4_init_pnfs();
 	if (retval)
 		goto out_free_slabs;
-	nfsd_stat_init();	/* Statistics */
+	retval = nfsd_stat_init();	/* Statistics */
+	if (retval)
+		goto out_free_pnfs;
 	retval = nfsd_drc_slab_create();
 	if (retval)
 		goto out_free_stat;
@@ -1554,6 +1556,7 @@  static int __init init_nfsd(void)
 	nfsd_drc_slab_free();
 out_free_stat:
 	nfsd_stat_shutdown();
+out_free_pnfs:
 	nfsd4_exit_pnfs();
 out_free_slabs:
 	nfsd4_free_slabs();
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index c81dbbad8792..1879758bbaa5 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -400,7 +400,7 @@  fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
 	}
 out:
 	if (error == nfserr_stale)
-		nfsdstats.fh_stale++;
+		percpu_counter_inc(&nfsdstats.fh_stale);
 	return error;
 }
 
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index b1bc582b0493..7bef1e7139d7 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -38,17 +38,17 @@  static int nfsd_proc_show(struct seq_file *seq, void *v)
 {
 	int i;
 
-	seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n",
-		      nfsdstats.rchits,
-		      nfsdstats.rcmisses,
-		      nfsdstats.rcnocache,
-		      nfsdstats.fh_stale,
-		      nfsdstats.fh_lookup,
-		      nfsdstats.fh_anon,
-		      nfsdstats.fh_nocache_dir,
-		      nfsdstats.fh_nocache_nondir,
-		      nfsdstats.io_read,
-		      nfsdstats.io_write);
+	seq_printf(seq, "rc %lld %lld %lld\nfh %lld %u %u %u %u\nio %lld %lld\n",
+		   percpu_counter_sum_positive(&nfsdstats.rchits),
+		   percpu_counter_sum_positive(&nfsdstats.rcmisses),
+		   percpu_counter_sum_positive(&nfsdstats.rcnocache),
+		   percpu_counter_sum_positive(&nfsdstats.fh_stale),
+		   nfsdstats.fh_lookup,
+		   nfsdstats.fh_anon,
+		   nfsdstats.fh_nocache_dir,
+		   nfsdstats.fh_nocache_nondir,
+		   percpu_counter_sum_positive(&nfsdstats.io_read),
+		   percpu_counter_sum_positive(&nfsdstats.io_write));
 	/* thread usage: */
 	seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt);
 	for (i=0; i<10; i++) {
@@ -62,7 +62,7 @@  static int nfsd_proc_show(struct seq_file *seq, void *v)
 	for (i=0; i<11; i++)
 		seq_printf(seq, " %u", nfsdstats.ra_depth[i]);
 	seq_putc(seq, '\n');
-	
+
 	/* show my rpc info */
 	svc_seq_show(seq, &nfsd_svcstats);
 
@@ -70,8 +70,10 @@  static int nfsd_proc_show(struct seq_file *seq, void *v)
 	/* Show count for individual nfsv4 operations */
 	/* Writing operation numbers 0 1 2 also for maintaining uniformity */
 	seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1);
-	for (i = 0; i <= LAST_NFS4_OP; i++)
-		seq_printf(seq, " %u", nfsdstats.nfs4_opcount[i]);
+	for (i = 0; i <= LAST_NFS4_OP; i++) {
+		seq_printf(seq, " %lld",
+			   percpu_counter_sum_positive(&nfsdstats.nfs4_opcount[i]));
+	}
 
 	seq_putc(seq, '\n');
 #endif
@@ -91,14 +93,63 @@  static const struct proc_ops nfsd_proc_ops = {
 	.proc_release	= single_release,
 };
 
-void
-nfsd_stat_init(void)
+int nfsd_percpu_counters_init(struct percpu_counter counters[], int num)
 {
+	int i, err = 0;
+
+	for (i = 0; !err && i < num; i++)
+		err = percpu_counter_init(&counters[i], 0, GFP_KERNEL);
+
+	if (!err)
+		return 0;
+
+	for (; i > 0; i--)
+		percpu_counter_destroy(&counters[i-1]);
+
+	return err;
+}
+
+void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num)
+{
+	int i;
+
+	for (i = 0; i < num; i++)
+		percpu_counter_set(&counters[i], 0);
+}
+
+void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num)
+{
+	int i;
+
+	for (i = 0; i < num; i++)
+		percpu_counter_destroy(&counters[i]);
+}
+
+static int nfsd_stat_counters_init(void)
+{
+	return nfsd_percpu_counters_init(nfsdstats.counters, NFSD_STATS_COUNTERS_NUM);
+}
+
+static void nfsd_stat_counters_destroy(void)
+{
+	nfsd_percpu_counters_destroy(nfsdstats.counters, NFSD_STATS_COUNTERS_NUM);
+}
+
+int nfsd_stat_init(void)
+{
+	int err;
+
+	err = nfsd_stat_counters_init();
+	if (err)
+		return err;
+
 	svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops);
+
+	return 0;
 }
 
-void
-nfsd_stat_shutdown(void)
+void nfsd_stat_shutdown(void)
 {
+	nfsd_stat_counters_destroy();
 	svc_proc_unregister(&init_net, "nfsd");
 }
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index b23fdac69820..ad52a916375e 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -8,37 +8,53 @@ 
 #define _NFSD_STATS_H
 
 #include <uapi/linux/nfsd/stats.h>
+#include <linux/percpu_counter.h>
 
 
 struct nfsd_stats {
-	unsigned int	rchits;		/* repcache hits */
-	unsigned int	rcmisses;	/* repcache hits */
-	unsigned int	rcnocache;	/* uncached reqs */
-	unsigned int	fh_stale;	/* FH stale error */
+	/* Reference to below counters as array for init/destroy */
+	struct percpu_counter	counters[0];
+	struct percpu_counter   rchits;         /* repcache hits */
+	struct percpu_counter   rcmisses;       /* repcache hits */
+	struct percpu_counter   rcnocache;      /* uncached reqs */
+	struct percpu_counter   fh_stale;       /* FH stale error */
+	struct percpu_counter   io_read;        /* bytes returned to read requests */
+	struct percpu_counter   io_write;       /* bytes passed in write requests */
+#ifdef CONFIG_NFSD_V4
+	/* Counters of individual nfsv4 operations */
+	struct percpu_counter	nfs4_opcount[LAST_NFS4_OP + 1];
+#endif
+	/* End of array of couters */
+	struct percpu_counter	counters_end[0];
+#define NFSD_STATS_COUNTERS_NUM \
+	((offsetof(struct nfsd_stats, counters_end) - \
+	  offsetof(struct nfsd_stats, counters)) / sizeof(struct percpu_counter))
+
+	/* Protected by nfsd_mutex */
+	unsigned int	th_cnt;		/* number of available threads */
+
+	/* Not updated at all?? */
 	unsigned int	fh_lookup;	/* dentry cached */
 	unsigned int	fh_anon;	/* anon file dentry returned */
 	unsigned int	fh_nocache_dir;	/* filehandle not found in dcache */
 	unsigned int	fh_nocache_nondir;	/* filehandle not found in dcache */
-	unsigned int	io_read;	/* bytes returned to read requests */
-	unsigned int	io_write;	/* bytes passed in write requests */
-	unsigned int	th_cnt;		/* number of available threads */
 	unsigned int	th_usage[10];	/* number of ticks during which n perdeciles
 					 * of available threads were in use */
 	unsigned int	th_fullcnt;	/* number of times last free thread was used */
 	unsigned int	ra_size;	/* size of ra cache */
 	unsigned int	ra_depth[11];	/* number of times ra entry was found that deep
 					 * in the cache (10percentiles). [10] = not found */
-#ifdef CONFIG_NFSD_V4
-	unsigned int	nfs4_opcount[LAST_NFS4_OP + 1];	/* count of individual nfsv4 operations */
-#endif
-
 };
 
 
 extern struct nfsd_stats	nfsdstats;
+
 extern struct svc_stat		nfsd_svcstats;
 
-void	nfsd_stat_init(void);
-void	nfsd_stat_shutdown(void);
+int nfsd_percpu_counters_init(struct percpu_counter counters[], int num);
+void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num);
+void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num);
+int nfsd_stat_init(void);
+void nfsd_stat_shutdown(void);
 
 #endif /* _NFSD_STATS_H */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 1ecaceebee13..6adb7aba2575 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -889,7 +889,7 @@  static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			       unsigned long *count, u32 *eof, ssize_t host_err)
 {
 	if (host_err >= 0) {
-		nfsdstats.io_read += host_err;
+		percpu_counter_add(&nfsdstats.io_read, host_err);
 		*eof = nfsd_eof_on_read(file, offset, host_err, *count);
 		*count = host_err;
 		fsnotify_access(file);
@@ -1031,7 +1031,7 @@  nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
 		goto out_nfserr;
 	}
 	*cnt = host_err;
-	nfsdstats.io_write += *cnt;
+	percpu_counter_add(&nfsdstats.io_write, *cnt);
 	fsnotify_modify(file);
 
 	if (stable && use_wgather) {