[v3,2/8] ceph: add caps perf metric for each session
diff mbox series

Message ID 20200115034444.14304-3-xiubli@redhat.com
State New
Headers show
Series
  • ceph: add perf metrics support
Related show

Commit Message

Xiubo Li Jan. 15, 2020, 3:44 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

This will fulfill the caps hit/miss metric for each session. When
checking the "need" mask and if one cap has the subset of the "need"
mask it means hit, or missed.

item          total           miss            hit
-------------------------------------------------
d_lease       295             0               993

session       caps            miss            hit
-------------------------------------------------
0             295             107             4119
1             1               107             9

URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/acl.c        |  2 ++
 fs/ceph/addr.c       |  1 +
 fs/ceph/caps.c       | 71 ++++++++++++++++++++++++++++++++++++++++++++
 fs/ceph/debugfs.c    | 20 +++++++++++++
 fs/ceph/dir.c        |  4 +++
 fs/ceph/file.c       |  4 ++-
 fs/ceph/mds_client.c | 16 +++++++++-
 fs/ceph/mds_client.h |  3 ++
 fs/ceph/quota.c      |  8 +++--
 fs/ceph/super.h      |  6 ++++
 fs/ceph/xattr.c      | 17 +++++++++--
 11 files changed, 145 insertions(+), 7 deletions(-)

Comments

Jeff Layton Jan. 15, 2020, 2:24 p.m. UTC | #1
On Tue, 2020-01-14 at 22:44 -0500, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> This will fulfill the caps hit/miss metric for each session. When
> checking the "need" mask and if one cap has the subset of the "need"
> mask it means hit, or missed.
> 
> item          total           miss            hit
> -------------------------------------------------
> d_lease       295             0               993
> 
> session       caps            miss            hit
> -------------------------------------------------
> 0             295             107             4119
> 1             1               107             9
> 
> URL: https://tracker.ceph.com/issues/43215
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/acl.c        |  2 ++
>  fs/ceph/addr.c       |  1 +
>  fs/ceph/caps.c       | 71 ++++++++++++++++++++++++++++++++++++++++++++
>  fs/ceph/debugfs.c    | 20 +++++++++++++
>  fs/ceph/dir.c        |  4 +++
>  fs/ceph/file.c       |  4 ++-
>  fs/ceph/mds_client.c | 16 +++++++++-
>  fs/ceph/mds_client.h |  3 ++
>  fs/ceph/quota.c      |  8 +++--
>  fs/ceph/super.h      |  6 ++++
>  fs/ceph/xattr.c      | 17 +++++++++--
>  11 files changed, 145 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
> index 26be6520d3fb..58e119e3519f 100644
> --- a/fs/ceph/acl.c
> +++ b/fs/ceph/acl.c
> @@ -22,6 +22,8 @@ static inline void ceph_set_cached_acl(struct inode *inode,
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  
>  	spin_lock(&ci->i_ceph_lock);
> +	__ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
> +
>  	if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
>  		set_cached_acl(inode, type, acl);
>  	else
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 7ab616601141..fe8adf3dc065 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -1706,6 +1706,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
>  			err = -ENOMEM;
>  			goto out;
>  		}
> +		__ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA);
> 		err = __ceph_do_getattr(inode, page,
>  					CEPH_STAT_CAP_INLINE_DATA, true);
>  		if (err < 0) {
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 7fc87b693ba4..df85980f0930 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -783,6 +783,73 @@ static int __cap_is_valid(struct ceph_cap *cap)
>  	return 1;
>  }
>  
> +/*
> + * Counts the cap metric.
> + */
> +void __ceph_caps_metric(struct ceph_inode_info *ci, int mask)
> +{
> +	int have = ci->i_snap_caps;
> +	struct ceph_mds_session *s;
> +	struct ceph_cap *cap;
> +	struct rb_node *p;
> +	bool skip_auth = false;
> +
> +	if (mask <= 0)
> +		return;
> +
> +	/* Counts the snap caps metric in the auth cap */
> +	if (ci->i_auth_cap) {
> +		cap = ci->i_auth_cap;
> +		if (have) {
> +			have |= cap->issued;
> +
> +			dout("%s %p cap %p issued %s, mask %s\n", __func__,
> +			     &ci->vfs_inode, cap, ceph_cap_string(cap->issued),
> +			     ceph_cap_string(mask));
> +
> +			s = ceph_get_mds_session(cap->session);
> +			if (s) {
> +				if (mask & have)
> +					percpu_counter_inc(&s->i_caps_hit);
> +				else
> +					percpu_counter_inc(&s->i_caps_mis);
> +				ceph_put_mds_session(s);
> +			}
> +			skip_auth = true;
> +		}
> +	}
> +
> +	if ((mask & have) == mask)
> +		return;
> +
> +	/* Checks others */


Iterating over i_caps requires that you hold the i_ceph_lock. Some
callers of __ceph_caps_metric already hold it but some of the callers
don't.

The simple fix would be to wrap this function in another that takes and
drops the i_ceph_lock before calling this one. It would also be good to
add this at the top of this function as well:

	lockdep_assert_held(&ci->i_ceph_lock);

The bad part is that this does mean adding in extra spinlocking to some
of these codepaths, which is less than ideal. Eventually, I think we
ought to convert the cap handling to use RCU and move the i_caps tree to
a linked list. That would allow us to avoid a lot of the locking for
stuff like this, and it never has _that_ many entries to where a tree
really matters.

> +	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
> +		cap = rb_entry(p, struct ceph_cap, ci_node);
> +		if (!__cap_is_valid(cap))
> +			continue;
> +
> +		if (skip_auth && cap == ci->i_auth_cap)
> +			continue;
> +
> +		dout("%s %p cap %p issued %s, mask %s\n", __func__,
> +		     &ci->vfs_inode, cap, ceph_cap_string(cap->issued),
> +		     ceph_cap_string(mask));
> +
> +		s = ceph_get_mds_session(cap->session);
> +		if (s) {
> +			if (mask & cap->issued)
> +				percpu_counter_inc(&s->i_caps_hit);
> +			else
> +				percpu_counter_inc(&s->i_caps_mis);
> +			ceph_put_mds_session(s);
> +		}
> +
> +		have |= cap->issued;
> +		if ((mask & have) == mask)
> +			return;
> +	}
> +}
> +
>  /*
>   * Return set of valid cap bits issued to us.  Note that caps time
>   * out, and may be invalidated in bulk if the client session times out
> @@ -881,6 +948,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
>  		cap = rb_entry(p, struct ceph_cap, ci_node);
>  		if (!__cap_is_valid(cap))
>  			continue;
> +
>  		if ((cap->issued & mask) == mask) {
>  			dout("__ceph_caps_issued_mask ino 0x%lx cap %p issued %s"
>  			     " (mask %s)\n", ci->vfs_inode.i_ino, cap,
> @@ -2603,6 +2671,8 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
>  		spin_lock(&ci->i_ceph_lock);
>  	}
>  
> +	__ceph_caps_metric(ci, need);
> +

Should "want" also count toward hits and misses here? IOW:

	__ceph_caps_metric(ci, need | want);

?

>  	have = __ceph_caps_issued(ci, &implemented);
>  
>  	if (have & need & CEPH_CAP_FILE_WR) {
> @@ -2871,6 +2941,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
>  			 * getattr request will bring inline data into
> +			__ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA);
>  			 */
> +			__ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA);
>  			ret = __ceph_do_getattr(inode, NULL,
>  						CEPH_STAT_CAP_INLINE_DATA,
>  						true);
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index 40a22da0214a..c132fdb40d53 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -128,6 +128,7 @@ static int metric_show(struct seq_file *s, void *p)
>  {
>  	struct ceph_fs_client *fsc = s->private;
>  	struct ceph_mds_client *mdsc = fsc->mdsc;
> +	int i;
>  
>  	seq_printf(s, "item          total           miss            hit\n");
>  	seq_printf(s, "-------------------------------------------------\n");
> @@ -137,6 +138,25 @@ static int metric_show(struct seq_file *s, void *p)
>  		   percpu_counter_sum(&mdsc->metric.d_lease_mis),
>  		   percpu_counter_sum(&mdsc->metric.d_lease_hit));
>  
> +	seq_printf(s, "\n");
> +	seq_printf(s, "session       caps            miss            hit\n");
> +	seq_printf(s, "-------------------------------------------------\n");
> +
> +	mutex_lock(&mdsc->mutex);
> +	for (i = 0; i < mdsc->max_sessions; i++) {
> +		struct ceph_mds_session *session;
> +
> +		session = __ceph_lookup_mds_session(mdsc, i);
> +		if (!session)
> +			continue;
> +		seq_printf(s, "%-14d%-16d%-16lld%lld\n", i,
> +			   session->s_nr_caps,
> +			   percpu_counter_sum(&session->i_caps_mis),
> +			   percpu_counter_sum(&session->i_caps_hit));
> +		ceph_put_mds_session(session);
> +	}
> +	mutex_unlock(&mdsc->mutex);
> +
>  	return 0;
>  }
>  
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index 658c55b323cc..c381ce430036 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -342,6 +342,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>  
>  	/* can we use the dcache? */
>  	spin_lock(&ci->i_ceph_lock);
> +	__ceph_caps_metric(ci, CEPH_CAP_FILE_SHARED);
> +
>  	if (ceph_test_mount_opt(fsc, DCACHE) &&
>  	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
>  	    ceph_snap(inode) != CEPH_SNAPDIR &&
> @@ -757,6 +759,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
>  		struct ceph_dentry_info *di = ceph_dentry(dentry);
>  
>  		spin_lock(&ci->i_ceph_lock);
> +		__ceph_caps_metric(ci, CEPH_CAP_FILE_SHARED);
> +
>  		dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
>  		if (strncmp(dentry->d_name.name,
>  			    fsc->mount_options->snapdir_name,
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 1e6cdf2dfe90..b32aba4023b3 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -393,6 +393,7 @@ int ceph_open(struct inode *inode, struct file *file)
>  		     inode, fmode, ceph_cap_string(wanted),
>  		     ceph_cap_string(issued));
>  		__ceph_get_fmode(ci, fmode);
> +		__ceph_caps_metric(ci, fmode);

This looks wrong. fmode is not a cap mask.

>  		spin_unlock(&ci->i_ceph_lock);
>  
>  		/* adjust wanted? */
> @@ -403,7 +404,7 @@ int ceph_open(struct inode *inode, struct file *file)
>  
>  		return ceph_init_file(inode, file, fmode);
>  	} else if (ceph_snap(inode) != CEPH_NOSNAP &&
> -		   (ci->i_snap_caps & wanted) == wanted) {
> +			(ci->i_snap_caps & wanted) == wanted) {
>  		__ceph_get_fmode(ci, fmode);
>  		spin_unlock(&ci->i_ceph_lock);
>  		return ceph_init_file(inode, file, fmode);
> @@ -1340,6 +1341,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  				return -ENOMEM;
>  		}
>  
> +		__ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA);
>  		statret = __ceph_do_getattr(inode, page,
>  					    CEPH_STAT_CAP_INLINE_DATA, !!page);
>  		if (statret < 0) {
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index a24fd00676b8..141c1c03636c 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -558,6 +558,8 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
>  	if (refcount_dec_and_test(&s->s_ref)) {
>  		if (s->s_auth.authorizer)
>  			ceph_auth_destroy_authorizer(s->s_auth.authorizer);
> +		percpu_counter_destroy(&s->i_caps_hit);
> +		percpu_counter_destroy(&s->i_caps_mis);
>  		kfree(s);
>  	}
>  }
> @@ -598,6 +600,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>  						 int mds)
>  {
>  	struct ceph_mds_session *s;
> +	int err;
>  
>  	if (mds >= mdsc->mdsmap->possible_max_rank)
>  		return ERR_PTR(-EINVAL);
> @@ -612,8 +615,10 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>  
>  		dout("%s: realloc to %d\n", __func__, newmax);
>  		sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
> -		if (!sa)
> +		if (!sa) {
> +			err = -ENOMEM;
>  			goto fail_realloc;
> +		}
>  		if (mdsc->sessions) {
>  			memcpy(sa, mdsc->sessions,
>  			       mdsc->max_sessions * sizeof(void *));
> @@ -653,6 +658,13 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>  
>  	INIT_LIST_HEAD(&s->s_cap_flushing);
>  
> +	err = percpu_counter_init(&s->i_caps_hit, 0, GFP_NOFS);
> +	if (err)
> +		goto fail_realloc;
> +	err = percpu_counter_init(&s->i_caps_mis, 0, GFP_NOFS);
> +	if (err)
> +		goto fail_init;
> +
>  	mdsc->sessions[mds] = s;
>  	atomic_inc(&mdsc->num_sessions);
>  	refcount_inc(&s->s_ref);  /* one ref to sessions[], one to caller */
> @@ -662,6 +674,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>  
>  	return s;
>  
> +fail_init:
> +	percpu_counter_destroy(&s->i_caps_hit);
>  fail_realloc:
>  	kfree(s);
>  	return ERR_PTR(-ENOMEM);
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 7c839a1183e5..7645cecf7fb0 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -201,6 +201,9 @@ struct ceph_mds_session {
>  
>  	struct list_head  s_waiting;  /* waiting requests */
>  	struct list_head  s_unsafe;   /* unsafe requests */
> +
> +	struct percpu_counter i_caps_hit;
> +	struct percpu_counter i_caps_mis;
>  };
>  
>  /*
> diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
> index de56dee60540..7b248f698100 100644
> --- a/fs/ceph/quota.c
> +++ b/fs/ceph/quota.c
> @@ -147,9 +147,13 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
>  		return NULL;
>  	}
>  	if (qri->inode) {
> +		int ret;
> +
> +		__ceph_caps_metric(ceph_inode(qri->inode), CEPH_STAT_CAP_INODE);
> +
>  		/* get caps */
> -		int ret = __ceph_do_getattr(qri->inode, NULL,
> -					    CEPH_STAT_CAP_INODE, true);
> +		ret = __ceph_do_getattr(qri->inode, NULL,
> +					CEPH_STAT_CAP_INODE, true);
>  		if (ret >= 0)
>  			in = qri->inode;
>  		else
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 7af91628636c..7a6f9913c8f1 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -642,6 +642,7 @@ static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
>  }
>  
>  extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
> +extern void __ceph_caps_metric(struct ceph_inode_info *ci, int mask);
>  extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
>  extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
>  				    struct ceph_cap *cap);
> @@ -927,6 +928,11 @@ extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
>  			     int mask, bool force);
>  static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
>  {
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +
> +	spin_lock(&ci->i_ceph_lock);
> +	__ceph_caps_metric(ci, mask);
> +	spin_unlock(&ci->i_ceph_lock);
>  	return __ceph_do_getattr(inode, NULL, mask, force);
>  }
>  extern int ceph_permission(struct inode *inode, int mask);
> diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
> index 98a9a3101cda..f3b1149ff7c5 100644
> --- a/fs/ceph/xattr.c
> +++ b/fs/ceph/xattr.c
> @@ -829,6 +829,7 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
>  	struct ceph_vxattr *vxattr = NULL;
>  	int req_mask;
>  	ssize_t err;
> +	int ret = -1;
>  
>  	/* let's see if a virtual xattr was requested */
>  	vxattr = ceph_match_vxattr(inode, name);
> @@ -856,7 +857,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
>  
>  	if (ci->i_xattrs.version == 0 ||
>  	    !((req_mask & CEPH_CAP_XATTR_SHARED) ||
> -	      __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
> +	      (ret = __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)))) {
> +		if (ret != -1)
> +			__ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
>  		spin_unlock(&ci->i_ceph_lock);
>  
>  		/* security module gets xattr while filling trace */
> @@ -871,6 +874,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
>  		if (err)
>  			return err;
>  		spin_lock(&ci->i_ceph_lock);
> +	} else {
> +		if (ret != -1)
> +			__ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
>  	}
>  
>  	err = __build_xattrs(inode);
> @@ -907,19 +913,24 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	bool len_only = (size == 0);
>  	u32 namelen;
> -	int err;
> +	int err, ret = -1;
>  
>  	spin_lock(&ci->i_ceph_lock);
>  	dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
>  	     ci->i_xattrs.version, ci->i_xattrs.index_version);
>  
>  	if (ci->i_xattrs.version == 0 ||
> -	    !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
> +	    !(ret = __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
> +		if (ret != -1)
> +			__ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
>  		spin_unlock(&ci->i_ceph_lock);
>  		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
>  		if (err)
>  			return err;
>  		spin_lock(&ci->i_ceph_lock);
> +	} else {
> +		if (ret != -1)
> +			__ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
>  	}
>  
>  	err = __build_xattrs(inode);
Xiubo Li Jan. 16, 2020, 1:57 a.m. UTC | #2
On 2020/1/15 22:24, Jeff Layton wrote:
> On Tue, 2020-01-14 at 22:44 -0500, xiubli@redhat.com wrote:
[...]
>> +/*
>> + * Counts the cap metric.
>> + */
>> +void __ceph_caps_metric(struct ceph_inode_info *ci, int mask)
>> +{
>> +	int have = ci->i_snap_caps;
>> +	struct ceph_mds_session *s;
>> +	struct ceph_cap *cap;
>> +	struct rb_node *p;
>> +	bool skip_auth = false;
>> +
>> +	if (mask <= 0)
>> +		return;
>> +
>> +	/* Counts the snap caps metric in the auth cap */
>> +	if (ci->i_auth_cap) {
>> +		cap = ci->i_auth_cap;
>> +		if (have) {
>> +			have |= cap->issued;
>> +
>> +			dout("%s %p cap %p issued %s, mask %s\n", __func__,
>> +			     &ci->vfs_inode, cap, ceph_cap_string(cap->issued),
>> +			     ceph_cap_string(mask));
>> +
>> +			s = ceph_get_mds_session(cap->session);
>> +			if (s) {
>> +				if (mask & have)
>> +					percpu_counter_inc(&s->i_caps_hit);
>> +				else
>> +					percpu_counter_inc(&s->i_caps_mis);
>> +				ceph_put_mds_session(s);
>> +			}
>> +			skip_auth = true;
>> +		}
>> +	}
>> +
>> +	if ((mask & have) == mask)
>> +		return;
>> +
>> +	/* Checks others */
>
> Iterating over i_caps requires that you hold the i_ceph_lock. Some
> callers of __ceph_caps_metric already hold it but some of the callers
> don't.
>
> The simple fix would be to wrap this function in another that takes and
> drops the i_ceph_lock before calling this one. It would also be good to
> add this at the top of this function as well:
>
> 	lockdep_assert_held(&ci->i_ceph_lock);

Yeah, let fix it using the simple way for now.


>
> The bad part is that this does mean adding in extra spinlocking to some
> of these codepaths, which is less than ideal. Eventually, I think we
> ought to convert the cap handling to use RCU and move the i_caps tree to
> a linked list. That would allow us to avoid a lot of the locking for
> stuff like this, and it never has _that_ many entries to where a tree
> really matters.
>
>> +	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
>> +		cap = rb_entry(p, struct ceph_cap, ci_node);
>> +		if (!__cap_is_valid(cap))
>> +			continue;
>> +
>> +		if (skip_auth && cap == ci->i_auth_cap)
>> +			continue;
>> +
>> +		dout("%s %p cap %p issued %s, mask %s\n", __func__,

[...]
>> @@ -2603,6 +2671,8 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
>>   		spin_lock(&ci->i_ceph_lock);
>>   	}
>>   
>> +	__ceph_caps_metric(ci, need);
>> +
> Should "want" also count toward hits and misses here? IOW:
>
> 	__ceph_caps_metric(ci, need | want);
>
> ?

Yeah, this makes sense.


>
[...]
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index 1e6cdf2dfe90..b32aba4023b3 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -393,6 +393,7 @@ int ceph_open(struct inode *inode, struct file *file)
>>   		     inode, fmode, ceph_cap_string(wanted),
>>   		     ceph_cap_string(issued));
>>   		__ceph_get_fmode(ci, fmode);
>> +		__ceph_caps_metric(ci, fmode);
> This looks wrong. fmode is not a cap mask.
>
It should be "wanted" here.

Thanks

Patch
diff mbox series

diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 26be6520d3fb..58e119e3519f 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -22,6 +22,8 @@  static inline void ceph_set_cached_acl(struct inode *inode,
 	struct ceph_inode_info *ci = ceph_inode(inode);
 
 	spin_lock(&ci->i_ceph_lock);
+	__ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
+
 	if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
 		set_cached_acl(inode, type, acl);
 	else
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 7ab616601141..fe8adf3dc065 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1706,6 +1706,7 @@  int ceph_uninline_data(struct file *filp, struct page *locked_page)
 			err = -ENOMEM;
 			goto out;
 		}
+		__ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA);
 		err = __ceph_do_getattr(inode, page,
 					CEPH_STAT_CAP_INLINE_DATA, true);
 		if (err < 0) {
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7fc87b693ba4..df85980f0930 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -783,6 +783,73 @@  static int __cap_is_valid(struct ceph_cap *cap)
 	return 1;
 }
 
+/*
+ * Counts the cap metric.
+ */
+void __ceph_caps_metric(struct ceph_inode_info *ci, int mask)
+{
+	int have = ci->i_snap_caps;
+	struct ceph_mds_session *s;
+	struct ceph_cap *cap;
+	struct rb_node *p;
+	bool skip_auth = false;
+
+	if (mask <= 0)
+		return;
+
+	/* Counts the snap caps metric in the auth cap */
+	if (ci->i_auth_cap) {
+		cap = ci->i_auth_cap;
+		if (have) {
+			have |= cap->issued;
+
+			dout("%s %p cap %p issued %s, mask %s\n", __func__,
+			     &ci->vfs_inode, cap, ceph_cap_string(cap->issued),
+			     ceph_cap_string(mask));
+
+			s = ceph_get_mds_session(cap->session);
+			if (s) {
+				if (mask & have)
+					percpu_counter_inc(&s->i_caps_hit);
+				else
+					percpu_counter_inc(&s->i_caps_mis);
+				ceph_put_mds_session(s);
+			}
+			skip_auth = true;
+		}
+	}
+
+	if ((mask & have) == mask)
+		return;
+
+	/* Checks others */
+	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+		cap = rb_entry(p, struct ceph_cap, ci_node);
+		if (!__cap_is_valid(cap))
+			continue;
+
+		if (skip_auth && cap == ci->i_auth_cap)
+			continue;
+
+		dout("%s %p cap %p issued %s, mask %s\n", __func__,
+		     &ci->vfs_inode, cap, ceph_cap_string(cap->issued),
+		     ceph_cap_string(mask));
+
+		s = ceph_get_mds_session(cap->session);
+		if (s) {
+			if (mask & cap->issued)
+				percpu_counter_inc(&s->i_caps_hit);
+			else
+				percpu_counter_inc(&s->i_caps_mis);
+			ceph_put_mds_session(s);
+		}
+
+		have |= cap->issued;
+		if ((mask & have) == mask)
+			return;
+	}
+}
+
 /*
  * Return set of valid cap bits issued to us.  Note that caps time
  * out, and may be invalidated in bulk if the client session times out
@@ -881,6 +948,7 @@  int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
 		cap = rb_entry(p, struct ceph_cap, ci_node);
 		if (!__cap_is_valid(cap))
 			continue;
+
 		if ((cap->issued & mask) == mask) {
 			dout("__ceph_caps_issued_mask ino 0x%lx cap %p issued %s"
 			     " (mask %s)\n", ci->vfs_inode.i_ino, cap,
@@ -2603,6 +2671,8 @@  static int try_get_cap_refs(struct inode *inode, int need, int want,
 		spin_lock(&ci->i_ceph_lock);
 	}
 
+	__ceph_caps_metric(ci, need);
+
 	have = __ceph_caps_issued(ci, &implemented);
 
 	if (have & need & CEPH_CAP_FILE_WR) {
@@ -2871,6 +2941,7 @@  int ceph_get_caps(struct file *filp, int need, int want,
 			 * getattr request will bring inline data into
 			 * page cache
 			 */
+			__ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA);
 			ret = __ceph_do_getattr(inode, NULL,
 						CEPH_STAT_CAP_INLINE_DATA,
 						true);
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 40a22da0214a..c132fdb40d53 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -128,6 +128,7 @@  static int metric_show(struct seq_file *s, void *p)
 {
 	struct ceph_fs_client *fsc = s->private;
 	struct ceph_mds_client *mdsc = fsc->mdsc;
+	int i;
 
 	seq_printf(s, "item          total           miss            hit\n");
 	seq_printf(s, "-------------------------------------------------\n");
@@ -137,6 +138,25 @@  static int metric_show(struct seq_file *s, void *p)
 		   percpu_counter_sum(&mdsc->metric.d_lease_mis),
 		   percpu_counter_sum(&mdsc->metric.d_lease_hit));
 
+	seq_printf(s, "\n");
+	seq_printf(s, "session       caps            miss            hit\n");
+	seq_printf(s, "-------------------------------------------------\n");
+
+	mutex_lock(&mdsc->mutex);
+	for (i = 0; i < mdsc->max_sessions; i++) {
+		struct ceph_mds_session *session;
+
+		session = __ceph_lookup_mds_session(mdsc, i);
+		if (!session)
+			continue;
+		seq_printf(s, "%-14d%-16d%-16lld%lld\n", i,
+			   session->s_nr_caps,
+			   percpu_counter_sum(&session->i_caps_mis),
+			   percpu_counter_sum(&session->i_caps_hit));
+		ceph_put_mds_session(session);
+	}
+	mutex_unlock(&mdsc->mutex);
+
 	return 0;
 }
 
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 658c55b323cc..c381ce430036 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -342,6 +342,8 @@  static int ceph_readdir(struct file *file, struct dir_context *ctx)
 
 	/* can we use the dcache? */
 	spin_lock(&ci->i_ceph_lock);
+	__ceph_caps_metric(ci, CEPH_CAP_FILE_SHARED);
+
 	if (ceph_test_mount_opt(fsc, DCACHE) &&
 	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
 	    ceph_snap(inode) != CEPH_SNAPDIR &&
@@ -757,6 +759,8 @@  static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 		struct ceph_dentry_info *di = ceph_dentry(dentry);
 
 		spin_lock(&ci->i_ceph_lock);
+		__ceph_caps_metric(ci, CEPH_CAP_FILE_SHARED);
+
 		dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
 		if (strncmp(dentry->d_name.name,
 			    fsc->mount_options->snapdir_name,
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 1e6cdf2dfe90..b32aba4023b3 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -393,6 +393,7 @@  int ceph_open(struct inode *inode, struct file *file)
 		     inode, fmode, ceph_cap_string(wanted),
 		     ceph_cap_string(issued));
 		__ceph_get_fmode(ci, fmode);
+		__ceph_caps_metric(ci, fmode);
 		spin_unlock(&ci->i_ceph_lock);
 
 		/* adjust wanted? */
@@ -403,7 +404,7 @@  int ceph_open(struct inode *inode, struct file *file)
 
 		return ceph_init_file(inode, file, fmode);
 	} else if (ceph_snap(inode) != CEPH_NOSNAP &&
-		   (ci->i_snap_caps & wanted) == wanted) {
+			(ci->i_snap_caps & wanted) == wanted) {
 		__ceph_get_fmode(ci, fmode);
 		spin_unlock(&ci->i_ceph_lock);
 		return ceph_init_file(inode, file, fmode);
@@ -1340,6 +1341,7 @@  static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 				return -ENOMEM;
 		}
 
+		__ceph_caps_metric(ci, CEPH_STAT_CAP_INLINE_DATA);
 		statret = __ceph_do_getattr(inode, page,
 					    CEPH_STAT_CAP_INLINE_DATA, !!page);
 		if (statret < 0) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a24fd00676b8..141c1c03636c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -558,6 +558,8 @@  void ceph_put_mds_session(struct ceph_mds_session *s)
 	if (refcount_dec_and_test(&s->s_ref)) {
 		if (s->s_auth.authorizer)
 			ceph_auth_destroy_authorizer(s->s_auth.authorizer);
+		percpu_counter_destroy(&s->i_caps_hit);
+		percpu_counter_destroy(&s->i_caps_mis);
 		kfree(s);
 	}
 }
@@ -598,6 +600,7 @@  static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 						 int mds)
 {
 	struct ceph_mds_session *s;
+	int err;
 
 	if (mds >= mdsc->mdsmap->possible_max_rank)
 		return ERR_PTR(-EINVAL);
@@ -612,8 +615,10 @@  static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 
 		dout("%s: realloc to %d\n", __func__, newmax);
 		sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
-		if (!sa)
+		if (!sa) {
+			err = -ENOMEM;
 			goto fail_realloc;
+		}
 		if (mdsc->sessions) {
 			memcpy(sa, mdsc->sessions,
 			       mdsc->max_sessions * sizeof(void *));
@@ -653,6 +658,13 @@  static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 
 	INIT_LIST_HEAD(&s->s_cap_flushing);
 
+	err = percpu_counter_init(&s->i_caps_hit, 0, GFP_NOFS);
+	if (err)
+		goto fail_realloc;
+	err = percpu_counter_init(&s->i_caps_mis, 0, GFP_NOFS);
+	if (err)
+		goto fail_init;
+
 	mdsc->sessions[mds] = s;
 	atomic_inc(&mdsc->num_sessions);
 	refcount_inc(&s->s_ref);  /* one ref to sessions[], one to caller */
@@ -662,6 +674,8 @@  static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 
 	return s;
 
+fail_init:
+	percpu_counter_destroy(&s->i_caps_hit);
 fail_realloc:
 	kfree(s);
 	return ERR_PTR(-ENOMEM);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 7c839a1183e5..7645cecf7fb0 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -201,6 +201,9 @@  struct ceph_mds_session {
 
 	struct list_head  s_waiting;  /* waiting requests */
 	struct list_head  s_unsafe;   /* unsafe requests */
+
+	struct percpu_counter i_caps_hit;
+	struct percpu_counter i_caps_mis;
 };
 
 /*
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index de56dee60540..7b248f698100 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -147,9 +147,13 @@  static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
 		return NULL;
 	}
 	if (qri->inode) {
+		int ret;
+
+		__ceph_caps_metric(ceph_inode(qri->inode), CEPH_STAT_CAP_INODE);
+
 		/* get caps */
-		int ret = __ceph_do_getattr(qri->inode, NULL,
-					    CEPH_STAT_CAP_INODE, true);
+		ret = __ceph_do_getattr(qri->inode, NULL,
+					CEPH_STAT_CAP_INODE, true);
 		if (ret >= 0)
 			in = qri->inode;
 		else
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7af91628636c..7a6f9913c8f1 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -642,6 +642,7 @@  static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
 }
 
 extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
+extern void __ceph_caps_metric(struct ceph_inode_info *ci, int mask);
 extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
 extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
 				    struct ceph_cap *cap);
@@ -927,6 +928,11 @@  extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
 			     int mask, bool force);
 static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
 {
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	spin_lock(&ci->i_ceph_lock);
+	__ceph_caps_metric(ci, mask);
+	spin_unlock(&ci->i_ceph_lock);
 	return __ceph_do_getattr(inode, NULL, mask, force);
 }
 extern int ceph_permission(struct inode *inode, int mask);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 98a9a3101cda..f3b1149ff7c5 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -829,6 +829,7 @@  ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 	struct ceph_vxattr *vxattr = NULL;
 	int req_mask;
 	ssize_t err;
+	int ret = -1;
 
 	/* let's see if a virtual xattr was requested */
 	vxattr = ceph_match_vxattr(inode, name);
@@ -856,7 +857,9 @@  ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 
 	if (ci->i_xattrs.version == 0 ||
 	    !((req_mask & CEPH_CAP_XATTR_SHARED) ||
-	      __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
+	      (ret = __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)))) {
+		if (ret != -1)
+			__ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
 		spin_unlock(&ci->i_ceph_lock);
 
 		/* security module gets xattr while filling trace */
@@ -871,6 +874,9 @@  ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 		if (err)
 			return err;
 		spin_lock(&ci->i_ceph_lock);
+	} else {
+		if (ret != -1)
+			__ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
 	}
 
 	err = __build_xattrs(inode);
@@ -907,19 +913,24 @@  ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	bool len_only = (size == 0);
 	u32 namelen;
-	int err;
+	int err, ret = -1;
 
 	spin_lock(&ci->i_ceph_lock);
 	dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
 	     ci->i_xattrs.version, ci->i_xattrs.index_version);
 
 	if (ci->i_xattrs.version == 0 ||
-	    !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
+	    !(ret = __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
+		if (ret != -1)
+			__ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
 		spin_unlock(&ci->i_ceph_lock);
 		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
 		if (err)
 			return err;
 		spin_lock(&ci->i_ceph_lock);
+	} else {
+		if (ret != -1)
+			__ceph_caps_metric(ci, CEPH_CAP_XATTR_SHARED);
 	}
 
 	err = __build_xattrs(inode);