diff mbox series

[v2,6/8] ceph: periodically send perf metrics to MDS

Message ID 20200108104152.28468-7-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series ceph: add perf metrics support | expand

Commit Message

Xiubo Li Jan. 8, 2020, 10:41 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

Add enable/disable sending metrics to MDS debugfs and disabled as
default, if it's enabled the kclient will send metrics every
second.

This will send global dentry lease hit/miss and read/write/metadata
latency metrics and each session's caps hit/miss metric to MDS.

Every time only sends the global metrics once via any availible
session.

Fixes: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/debugfs.c            |  44 +++++++-
 fs/ceph/mds_client.c         | 205 ++++++++++++++++++++++++++++++++---
 fs/ceph/mds_client.h         |   3 +
 fs/ceph/super.h              |   1 +
 include/linux/ceph/ceph_fs.h |  77 +++++++++++++
 5 files changed, 312 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index df8c1cc685d9..bb96fb4d04c4 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -124,6 +124,40 @@  static int mdsc_show(struct seq_file *s, void *p)
 	return 0;
 }
 
+/*
+ * metrics debugfs
+ */
+static int sending_metrics_set(void *data, u64 val)
+{
+	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+
+	if (val > 1) {
+		pr_err("Invalid sending metrics set value %llu\n", val);
+		return -EINVAL;
+	}
+
+	mutex_lock(&mdsc->mutex);
+	mdsc->sending_metrics = (unsigned int)val;
+	mutex_unlock(&mdsc->mutex);
+
+	return 0;
+}
+
+static int sending_metrics_get(void *data, u64 *val)
+{
+	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+
+	mutex_lock(&mdsc->mutex);
+	*val = (u64)mdsc->sending_metrics;
+	mutex_unlock(&mdsc->mutex);
+
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(sending_metrics_fops, sending_metrics_get,
+			sending_metrics_set, "%llu\n");
+
 static int metric_show(struct seq_file *s, void *p)
 {
 	struct ceph_fs_client *fsc = s->private;
@@ -308,11 +342,9 @@  static int congestion_kb_get(void *data, u64 *val)
 	*val = (u64)fsc->mount_options->congestion_kb;
 	return 0;
 }
-
 DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
 			congestion_kb_set, "%llu\n");
 
-
 void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 {
 	dout("ceph_fs_debugfs_cleanup\n");
@@ -322,6 +354,7 @@  void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 	debugfs_remove(fsc->debugfs_mds_sessions);
 	debugfs_remove(fsc->debugfs_caps);
 	debugfs_remove(fsc->debugfs_metric);
+	debugfs_remove(fsc->debugfs_sending_metrics);
 	debugfs_remove(fsc->debugfs_mdsc);
 }
 
@@ -362,6 +395,13 @@  void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 						fsc,
 						&mdsc_show_fops);
 
+	fsc->debugfs_sending_metrics =
+			debugfs_create_file("sending_metrics",
+					    0600,
+					    fsc->client->debugfs_dir,
+					    fsc,
+					    &sending_metrics_fops);
+
 	fsc->debugfs_metric = debugfs_create_file("metrics",
 						  0400,
 						  fsc->client->debugfs_dir,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ae2fe0277c6c..a0693ed6f54f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4148,13 +4148,162 @@  void ceph_mdsc_update_metadata_latency(struct ceph_client_metric *m,
 	spin_unlock(&m->metadata_lock);
 }
 
+/*
+ * called under s_mutex
+ */
+static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
+				   struct ceph_mds_session *s,
+				   bool skip_global)
+{
+	struct ceph_metric_head *head;
+	struct ceph_metric_cap *cap;
+	struct ceph_metric_dentry_lease *lease;
+	struct ceph_metric_read_latency *read;
+	struct ceph_metric_write_latency *write;
+	struct ceph_metric_metadata_latency *meta;
+	struct ceph_msg *msg;
+	struct timespec64 ts;
+	s32 len = sizeof(*head) + sizeof(*cap);
+	s64 sum, total, avg;
+	s32 items = 0;
+
+	if (!mdsc || !s)
+		return false;
+
+	if (!skip_global) {
+		len += sizeof(*lease);
+		len += sizeof(*read);
+		len += sizeof(*write);
+		len += sizeof(*meta);
+	}
+
+	msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
+	if (!msg) {
+		pr_err("send metrics to mds%d, failed to allocate message\n",
+		       s->s_mds);
+		return false;
+	}
+
+	head = msg->front.iov_base;
+
+	/* encode the cap metric */
+	cap = (struct ceph_metric_cap *)(head + 1);
+	cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
+	cap->ver = 1;
+	cap->campat = 1;
+	cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
+	cap->hit = cpu_to_le64(percpu_counter_sum(&s->i_caps_hit));
+	cap->mis = cpu_to_le64(percpu_counter_sum(&s->i_caps_mis));
+	cap->total = cpu_to_le64(s->s_nr_caps);
+	items++;
+
+	dout("cap metric hit %lld, mis %lld, total caps %lld",
+	     le64_to_cpu(cap->hit), le64_to_cpu(cap->mis),
+	     le64_to_cpu(cap->total));
+
+	/* only send the global once */
+	if (skip_global)
+		goto skip_global;
+
+	/* encode the dentry lease metric */
+	lease = (struct ceph_metric_dentry_lease *)(cap + 1);
+	lease->type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
+	lease->ver = 1;
+	lease->campat = 1;
+	lease->data_len = cpu_to_le32(sizeof(*lease) - 10);
+	lease->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.d_lease_hit));
+	lease->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.d_lease_mis));
+	lease->total = cpu_to_le64(atomic64_read(&mdsc->metric.total_dentries));
+	items++;
+
+	dout("dentry lease metric hit %lld, mis %lld, total dentries %lld",
+	     le64_to_cpu(lease->hit), le64_to_cpu(lease->mis),
+	     le64_to_cpu(lease->total));
+
+	/* encode the read latency metric */
+	read = (struct ceph_metric_read_latency *)(lease + 1);
+	read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
+	read->ver = 1;
+	read->campat = 1;
+	read->data_len = cpu_to_le32(sizeof(*read) - 10);
+	spin_lock(&mdsc->metric.read_lock);
+	total = atomic64_read(&mdsc->metric.total_reads),
+	sum = timespec64_to_ns(&mdsc->metric.read_latency_sum);
+	spin_unlock(&mdsc->metric.read_lock);
+	avg = total ? sum / total : 0;
+	ts = ns_to_timespec64(avg);
+	read->sec = cpu_to_le32(ts.tv_sec);
+	read->nsec = cpu_to_le32(ts.tv_nsec);
+	items++;
+
+	dout("read latency metric total %lld, sum lat %lld, avg lat %lld",
+	     total, sum, avg);
+
+	/* encode the write latency metric */
+	write = (struct ceph_metric_write_latency *)(read + 1);
+	write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
+	write->ver = 1;
+	write->campat = 1;
+	write->data_len = cpu_to_le32(sizeof(*write) - 10);
+	spin_lock(&mdsc->metric.write_lock);
+	total = atomic64_read(&mdsc->metric.total_writes),
+	sum = timespec64_to_ns(&mdsc->metric.write_latency_sum);
+	spin_unlock(&mdsc->metric.write_lock);
+	avg = total ? sum / total : 0;
+	ts = ns_to_timespec64(avg);
+	write->sec = cpu_to_le32(ts.tv_sec);
+	write->nsec = cpu_to_le32(ts.tv_nsec);
+	items++;
+
+	dout("write latency metric total %lld, sum lat %lld, avg lat %lld",
+	     total, sum, avg);
+
+	/* encode the metadata latency metric */
+	meta = (struct ceph_metric_metadata_latency *)(write + 1);
+	meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
+	meta->ver = 1;
+	meta->campat = 1;
+	meta->data_len = cpu_to_le32(sizeof(*meta) - 10);
+	spin_lock(&mdsc->metric.metadata_lock);
+	total = atomic64_read(&mdsc->metric.total_metadatas),
+	sum = timespec64_to_ns(&mdsc->metric.metadata_latency_sum);
+	spin_unlock(&mdsc->metric.metadata_lock);
+	avg = total ? sum / total : 0;
+	ts = ns_to_timespec64(avg);
+	meta->sec = cpu_to_le32(ts.tv_sec);
+	meta->nsec = cpu_to_le32(ts.tv_nsec);
+	items++;
+
+	dout("metadata latency metric total %lld, sum lat %lld, avg lat %lld",
+	     total, sum, avg);
+
+skip_global:
+	put_unaligned_le32(items, &head->num);
+	msg->front.iov_len = cpu_to_le32(len);
+	msg->hdr.version = cpu_to_le16(1);
+	msg->hdr.compat_version = cpu_to_le16(1);
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+	dout("send metrics to mds%d %p\n", s->s_mds, msg);
+	ceph_con_send(&s->s_con, msg);
+
+	return true;
+}
+
 /*
  * delayed work -- periodically trim expired leases, renew caps with mds
  */
+#define CEPH_WORK_DELAY_DEF 5
 static void schedule_delayed(struct ceph_mds_client *mdsc)
 {
-	int delay = 5;
-	unsigned hz = round_jiffies_relative(HZ * delay);
+	unsigned int hz;
+	int delay = CEPH_WORK_DELAY_DEF;
+
+	mutex_lock(&mdsc->mutex);
+	if (mdsc->sending_metrics)
+		delay = 1;
+	mutex_unlock(&mdsc->mutex);
+
+	hz = round_jiffies_relative(HZ * delay);
 	schedule_delayed_work(&mdsc->delayed_work, hz);
 }
 
@@ -4165,18 +4314,28 @@  static void delayed_work(struct work_struct *work)
 		container_of(work, struct ceph_mds_client, delayed_work.work);
 	int renew_interval;
 	int renew_caps;
+	bool metric_only;
+	bool sending_metrics;
+	bool g_skip = false;
 
 	dout("mdsc delayed_work\n");
 
 	mutex_lock(&mdsc->mutex);
-	renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
-	renew_caps = time_after_eq(jiffies, HZ*renew_interval +
-				   mdsc->last_renew_caps);
-	if (renew_caps)
-		mdsc->last_renew_caps = jiffies;
+	sending_metrics = !!mdsc->sending_metrics;
+	metric_only = mdsc->sending_metrics &&
+		(mdsc->ticks++ % CEPH_WORK_DELAY_DEF);
+
+	if (!metric_only) {
+		renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
+		renew_caps = time_after_eq(jiffies, HZ*renew_interval +
+					   mdsc->last_renew_caps);
+		if (renew_caps)
+			mdsc->last_renew_caps = jiffies;
+	}
 
 	for (i = 0; i < mdsc->max_sessions; i++) {
 		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
+
 		if (!s)
 			continue;
 		if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
@@ -4202,13 +4361,20 @@  static void delayed_work(struct work_struct *work)
 		mutex_unlock(&mdsc->mutex);
 
 		mutex_lock(&s->s_mutex);
-		if (renew_caps)
-			send_renew_caps(mdsc, s);
-		else
-			ceph_con_keepalive(&s->s_con);
-		if (s->s_state == CEPH_MDS_SESSION_OPEN ||
-		    s->s_state == CEPH_MDS_SESSION_HUNG)
-			ceph_send_cap_releases(mdsc, s);
+
+		if (sending_metrics)
+			g_skip = ceph_mdsc_send_metrics(mdsc, s, g_skip);
+
+		if (!metric_only) {
+			if (renew_caps)
+				send_renew_caps(mdsc, s);
+			else
+				ceph_con_keepalive(&s->s_con);
+			if (s->s_state == CEPH_MDS_SESSION_OPEN ||
+					s->s_state == CEPH_MDS_SESSION_HUNG)
+				ceph_send_cap_releases(mdsc, s);
+		}
+
 		mutex_unlock(&s->s_mutex);
 		ceph_put_mds_session(s);
 
@@ -4216,6 +4382,9 @@  static void delayed_work(struct work_struct *work)
 	}
 	mutex_unlock(&mdsc->mutex);
 
+	if (metric_only)
+		goto delay_work;
+
 	ceph_check_delayed_caps(mdsc);
 
 	ceph_queue_cap_reclaim_work(mdsc);
@@ -4224,11 +4393,13 @@  static void delayed_work(struct work_struct *work)
 
 	maybe_recover_session(mdsc);
 
+delay_work:
 	schedule_delayed(mdsc);
 }
 
-static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
+static int ceph_mdsc_metric_init(struct ceph_mds_client *mdsc)
 {
+	struct ceph_client_metric *metric = &mdsc->metric;
 	int ret;
 
 	if (!metric)
@@ -4256,6 +4427,8 @@  static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
 	memset(&metric->metadata_latency_sum, 0, sizeof(struct timespec64));
 	atomic64_set(&metric->total_metadatas, 0);
 
+	mdsc->sending_metrics = 0;
+	mdsc->ticks = 0;
 	return 0;
 }
 
@@ -4312,7 +4485,7 @@  int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	init_waitqueue_head(&mdsc->cap_flushing_wq);
 	INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
 	atomic_set(&mdsc->cap_reclaim_pending, 0);
-	err = ceph_mdsc_metric_init(&mdsc->metric);
+	err = ceph_mdsc_metric_init(mdsc);
 	if (err)
 		goto err_mdsmap;
 
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ee36ab87e5f6..5bffa4e6ba5d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -468,6 +468,9 @@  struct ceph_mds_client {
 	struct list_head  dentry_leases;     /* fifo list */
 	struct list_head  dentry_dir_leases; /* lru list */
 
+	/* metrics */
+	unsigned int		  sending_metrics;
+	unsigned int		  ticks;
 	struct ceph_client_metric metric;
 
 	spinlock_t		snapid_map_lock;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 88da9e21af75..9d2a5f1ce418 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -123,6 +123,7 @@  struct ceph_fs_client {
 	struct dentry *debugfs_congestion_kb;
 	struct dentry *debugfs_bdi;
 	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
+	struct dentry *debugfs_sending_metrics;
 	struct dentry *debugfs_metric;
 	struct dentry *debugfs_mds_sessions;
 #endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index cb21c5cf12c3..a0b227fc99dc 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -130,6 +130,7 @@  struct ceph_dir_layout {
 #define CEPH_MSG_CLIENT_REQUEST         24
 #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
 #define CEPH_MSG_CLIENT_REPLY           26
+#define CEPH_MSG_CLIENT_METRICS         29
 #define CEPH_MSG_CLIENT_CAPS            0x310
 #define CEPH_MSG_CLIENT_LEASE           0x311
 #define CEPH_MSG_CLIENT_SNAP            0x312
@@ -752,6 +753,82 @@  struct ceph_mds_lease {
 } __attribute__ ((packed));
 /* followed by a __le32+string for dname */
 
+enum ceph_metric_type {
+	CLIENT_METRIC_TYPE_CAP_INFO,
+	CLIENT_METRIC_TYPE_READ_LATENCY,
+	CLIENT_METRIC_TYPE_WRITE_LATENCY,
+	CLIENT_METRIC_TYPE_METADATA_LATENCY,
+	CLIENT_METRIC_TYPE_DENTRY_LEASE,
+
+	CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
+};
+
+/* metric caps header */
+struct ceph_metric_cap {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  campat;
+
+	__le32 data_len; /* length of sizeof(hit + mis + total) */
+	__le64 hit;
+	__le64 mis;
+	__le64 total;
+} __attribute__ ((packed));
+
+/* metric dentry lease header */
+struct ceph_metric_dentry_lease {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  campat;
+
+	__le32 data_len; /* length of sizeof(hit + mis + total) */
+	__le64 hit;
+	__le64 mis;
+	__le64 total;
+} __attribute__ ((packed));
+
+/* metric read latency header */
+struct ceph_metric_read_latency {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  campat;
+
+	__le32 data_len; /* length of sizeof(sec + nsec) */
+	__le32 sec;
+	__le32 nsec;
+} __attribute__ ((packed));
+
+/* metric write latency header */
+struct ceph_metric_write_latency {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  campat;
+
+	__le32 data_len; /* length of sizeof(sec + nsec) */
+	__le32 sec;
+	__le32 nsec;
+} __attribute__ ((packed));
+
+/* metric metadata latency header */
+struct ceph_metric_metadata_latency {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  campat;
+
+	__le32 data_len; /* length of sizeof(sec + nsec) */
+	__le32 sec;
+	__le32 nsec;
+} __attribute__ ((packed));
+
+struct ceph_metric_head {
+	__le32 num;	/* the number of metrics will be sent */
+} __attribute__ ((packed));
+
 /* client reconnect */
 struct ceph_mds_cap_reconnect {
 	__le64 cap_id;