diff mbox series

[4/4] ceph: add enable/disable sending metrics to MDS debugfs support

Message ID 20191224040514.26144-5-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series ceph: add caps and dentry lease perf metrics support | expand

Commit Message

Xiubo Li Dec. 24, 2019, 4:05 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

Disabled as default, if it's enabled the kclient will send metrics
every second.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/debugfs.c    | 44 ++++++++++++++++++++++++++++++--
 fs/ceph/mds_client.c | 60 +++++++++++++++++++++++++++++++-------------
 fs/ceph/mds_client.h |  3 +++
 fs/ceph/super.h      |  1 +
 4 files changed, 89 insertions(+), 19 deletions(-)

Comments

Ilya Dryomov Jan. 7, 2020, 11:13 a.m. UTC | #1
On Tue, Dec 24, 2019 at 5:05 AM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> Disabled as default, if it's enabled the kclient will send metrics
> every second.
>
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/debugfs.c    | 44 ++++++++++++++++++++++++++++++--
>  fs/ceph/mds_client.c | 60 +++++++++++++++++++++++++++++++-------------
>  fs/ceph/mds_client.h |  3 +++
>  fs/ceph/super.h      |  1 +
>  4 files changed, 89 insertions(+), 19 deletions(-)
>
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index c132fdb40d53..a26e559473fd 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -124,6 +124,40 @@ static int mdsc_show(struct seq_file *s, void *p)
>         return 0;
>  }
>
> +/*
> + * metrics debugfs
> + */
> +static int sending_metrics_set(void *data, u64 val)
> +{
> +       struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
> +       struct ceph_mds_client *mdsc = fsc->mdsc;
> +
> +       if (val > 1) {
> +               pr_err("Invalid sending metrics set value %llu\n", val);
> +               return -EINVAL;
> +       }
> +
> +       mutex_lock(&mdsc->mutex);
> +       mdsc->sending_metrics = (unsigned int)val;
> +       mutex_unlock(&mdsc->mutex);
> +
> +       return 0;
> +}
> +
> +static int sending_metrics_get(void *data, u64 *val)
> +{
> +       struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
> +       struct ceph_mds_client *mdsc = fsc->mdsc;
> +
> +       mutex_lock(&mdsc->mutex);
> +       *val = (u64)mdsc->sending_metrics;
> +       mutex_unlock(&mdsc->mutex);
> +
> +       return 0;
> +}
> +DEFINE_DEBUGFS_ATTRIBUTE(sending_metrics_fops, sending_metrics_get,
> +                        sending_metrics_set, "%llu\n");
> +
>  static int metric_show(struct seq_file *s, void *p)
>  {
>         struct ceph_fs_client *fsc = s->private;
> @@ -279,11 +313,9 @@ static int congestion_kb_get(void *data, u64 *val)
>         *val = (u64)fsc->mount_options->congestion_kb;
>         return 0;
>  }
> -
>  DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
>                         congestion_kb_set, "%llu\n");
>
> -
>  void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
>  {
>         dout("ceph_fs_debugfs_cleanup\n");
> @@ -293,6 +325,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
>         debugfs_remove(fsc->debugfs_mds_sessions);
>         debugfs_remove(fsc->debugfs_caps);
>         debugfs_remove(fsc->debugfs_metric);
> +       debugfs_remove(fsc->debugfs_sending_metrics);
>         debugfs_remove(fsc->debugfs_mdsc);
>  }
>
> @@ -333,6 +366,13 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
>                                                 fsc,
>                                                 &mdsc_show_fops);
>
> +       fsc->debugfs_sending_metrics =
> +                       debugfs_create_file_unsafe("sending_metrics",
> +                                                  0600,
> +                                                  fsc->client->debugfs_dir,
> +                                                  fsc,
> +                                                  &sending_metrics_fops);

Hi Xiubo,

Same question as to Chen.  Why are you using the unsafe variant
with DEFINE_DEBUGFS_ATTRIBUTE instead of just mirroring the existing
writeback_congestion_kb?  Have you verified that it is safe?

I was a little confused by this series as a whole too.  Patch 3 says
that these metrics will be sent every 5 seconds, which matches the caps
tick interval.  This patch changes that to 1 second and makes sending
metrics optional.  Perhaps merge patches 3 and 4 into a single patch
with a better changelog?

Do we really need to send metrics more often than we potentially renew
caps?

Thanks,

                Ilya
Xiubo Li Jan. 7, 2020, 1:19 p.m. UTC | #2
On 2020/1/7 19:13, Ilya Dryomov wrote:
> On Tue, Dec 24, 2019 at 5:05 AM <xiubli@redhat.com> wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Disabled as default, if it's enabled the kclient will send metrics
>> every second.
>>
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/debugfs.c    | 44 ++++++++++++++++++++++++++++++--
>>   fs/ceph/mds_client.c | 60 +++++++++++++++++++++++++++++++-------------
>>   fs/ceph/mds_client.h |  3 +++
>>   fs/ceph/super.h      |  1 +
>>   4 files changed, 89 insertions(+), 19 deletions(-)
>>
>> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
>> index c132fdb40d53..a26e559473fd 100644
>> --- a/fs/ceph/debugfs.c
>> +++ b/fs/ceph/debugfs.c
>> @@ -124,6 +124,40 @@ static int mdsc_show(struct seq_file *s, void *p)
>>          return 0;
>>   }
>>
>> +/*
>> + * metrics debugfs
>> + */
>> +static int sending_metrics_set(void *data, u64 val)
>> +{
>> +       struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
>> +       struct ceph_mds_client *mdsc = fsc->mdsc;
>> +
>> +       if (val > 1) {
>> +               pr_err("Invalid sending metrics set value %llu\n", val);
>> +               return -EINVAL;
>> +       }
>> +
>> +       mutex_lock(&mdsc->mutex);
>> +       mdsc->sending_metrics = (unsigned int)val;
>> +       mutex_unlock(&mdsc->mutex);
>> +
>> +       return 0;
>> +}
>> +
>> +static int sending_metrics_get(void *data, u64 *val)
>> +{
>> +       struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
>> +       struct ceph_mds_client *mdsc = fsc->mdsc;
>> +
>> +       mutex_lock(&mdsc->mutex);
>> +       *val = (u64)mdsc->sending_metrics;
>> +       mutex_unlock(&mdsc->mutex);
>> +
>> +       return 0;
>> +}
>> +DEFINE_DEBUGFS_ATTRIBUTE(sending_metrics_fops, sending_metrics_get,
>> +                        sending_metrics_set, "%llu\n");
>> +
>>   static int metric_show(struct seq_file *s, void *p)
>>   {
>>          struct ceph_fs_client *fsc = s->private;
>> @@ -279,11 +313,9 @@ static int congestion_kb_get(void *data, u64 *val)
>>          *val = (u64)fsc->mount_options->congestion_kb;
>>          return 0;
>>   }
>> -
>>   DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
>>                          congestion_kb_set, "%llu\n");
>>
>> -
>>   void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
>>   {
>>          dout("ceph_fs_debugfs_cleanup\n");
>> @@ -293,6 +325,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
>>          debugfs_remove(fsc->debugfs_mds_sessions);
>>          debugfs_remove(fsc->debugfs_caps);
>>          debugfs_remove(fsc->debugfs_metric);
>> +       debugfs_remove(fsc->debugfs_sending_metrics);
>>          debugfs_remove(fsc->debugfs_mdsc);
>>   }
>>
>> @@ -333,6 +366,13 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
>>                                                  fsc,
>>                                                  &mdsc_show_fops);
>>
>> +       fsc->debugfs_sending_metrics =
>> +                       debugfs_create_file_unsafe("sending_metrics",
>> +                                                  0600,
>> +                                                  fsc->client->debugfs_dir,
>> +                                                  fsc,
>> +                                                  &sending_metrics_fops);
> Hi Xiubo,
>
> Same question as to Chen.  Why are you using the unsafe variant
> with DEFINE_DEBUGFS_ATTRIBUTE instead of just mirroring the existing
> writeback_congestion_kb?  Have you verified that it is safe?

Just copied the code from some other place.

Any struct file_operations defined by means of the 
DEFINE_DEBUGFS_ATTRIBUTE macro is protected against file removals, so it 
should be okay.

> I was a little confused by this series as a whole too.  Patch 3 says
> that these metrics will be sent every 5 seconds, which matches the caps
> tick interval.  This patch changes that to 1 second and makes sending
> metrics optional.  Perhaps merge patches 3 and 4 into a single patch
> with a better changelog?

Just to make each patch as small as possible, which will be easier to 
review.

I am okay to merge them with a better changelog.

>
> Do we really need to send metrics more often than we potentially renew
> caps?

I had the same question, while the user client is tied to Client::tick() 
and sending the metrics every second.

Here will just keep it the same with the user client logic, but per 
second is not a must.

Thanks.


> Thanks,
>
>                  Ilya
>
diff mbox series

Patch

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index c132fdb40d53..a26e559473fd 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -124,6 +124,40 @@  static int mdsc_show(struct seq_file *s, void *p)
 	return 0;
 }
 
+/*
+ * metrics debugfs
+ */
+static int sending_metrics_set(void *data, u64 val)
+{
+	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+
+	if (val > 1) {
+		pr_err("Invalid sending metrics set value %llu\n", val);
+		return -EINVAL;
+	}
+
+	mutex_lock(&mdsc->mutex);
+	mdsc->sending_metrics = (unsigned int)val;
+	mutex_unlock(&mdsc->mutex);
+
+	return 0;
+}
+
+static int sending_metrics_get(void *data, u64 *val)
+{
+	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+
+	mutex_lock(&mdsc->mutex);
+	*val = (u64)mdsc->sending_metrics;
+	mutex_unlock(&mdsc->mutex);
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(sending_metrics_fops, sending_metrics_get,
+			 sending_metrics_set, "%llu\n");
+
 static int metric_show(struct seq_file *s, void *p)
 {
 	struct ceph_fs_client *fsc = s->private;
@@ -279,11 +313,9 @@  static int congestion_kb_get(void *data, u64 *val)
 	*val = (u64)fsc->mount_options->congestion_kb;
 	return 0;
 }
-
 DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
 			congestion_kb_set, "%llu\n");
 
-
 void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 {
 	dout("ceph_fs_debugfs_cleanup\n");
@@ -293,6 +325,7 @@  void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 	debugfs_remove(fsc->debugfs_mds_sessions);
 	debugfs_remove(fsc->debugfs_caps);
 	debugfs_remove(fsc->debugfs_metric);
+	debugfs_remove(fsc->debugfs_sending_metrics);
 	debugfs_remove(fsc->debugfs_mdsc);
 }
 
@@ -333,6 +366,13 @@  void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 						fsc,
 						&mdsc_show_fops);
 
+	fsc->debugfs_sending_metrics =
+			debugfs_create_file_unsafe("sending_metrics",
+						   0600,
+						   fsc->client->debugfs_dir,
+						   fsc,
+						   &sending_metrics_fops);
+
 	fsc->debugfs_metric = debugfs_create_file("metrics",
 						  0400,
 						  fsc->client->debugfs_dir,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 5b74202ed68f..d31612bdc1e3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4162,10 +4162,18 @@  static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
 /*
  * delayed work -- periodically trim expired leases, renew caps with mds
  */
+#define CEPH_WORK_DELAY_DEF 5
 static void schedule_delayed(struct ceph_mds_client *mdsc)
 {
-	int delay = 5;
-	unsigned hz = round_jiffies_relative(HZ * delay);
+	unsigned int hz;
+	int delay = CEPH_WORK_DELAY_DEF;
+
+	mutex_lock(&mdsc->mutex);
+	if (mdsc->sending_metrics)
+		delay = 1;
+	mutex_unlock(&mdsc->mutex);
+
+	hz = round_jiffies_relative(HZ * delay);
 	schedule_delayed_work(&mdsc->delayed_work, hz);
 }
 
@@ -4176,15 +4184,23 @@  static void delayed_work(struct work_struct *work)
 		container_of(work, struct ceph_mds_client, delayed_work.work);
 	int renew_interval;
 	int renew_caps;
+	bool metric_only;
+	bool sending_metrics;
 
 	dout("mdsc delayed_work\n");
 
 	mutex_lock(&mdsc->mutex);
-	renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
-	renew_caps = time_after_eq(jiffies, HZ*renew_interval +
-				   mdsc->last_renew_caps);
-	if (renew_caps)
-		mdsc->last_renew_caps = jiffies;
+	sending_metrics = !!mdsc->sending_metrics;
+	metric_only = mdsc->sending_metrics &&
+		(mdsc->ticks++ % CEPH_WORK_DELAY_DEF);
+
+	if (!metric_only) {
+		renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
+		renew_caps = time_after_eq(jiffies, HZ*renew_interval +
+					   mdsc->last_renew_caps);
+		if (renew_caps)
+			mdsc->last_renew_caps = jiffies;
+	}
 
 	for (i = 0; i < mdsc->max_sessions; i++) {
 		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
@@ -4216,15 +4232,18 @@  static void delayed_work(struct work_struct *work)
 
 		mutex_lock(&s->s_mutex);
 
-		g_skip = ceph_mdsc_send_metrics(mdsc, s, g_skip);
+		if (sending_metrics)
+			g_skip = ceph_mdsc_send_metrics(mdsc, s, g_skip);
 
-		if (renew_caps)
-			send_renew_caps(mdsc, s);
-		else
-			ceph_con_keepalive(&s->s_con);
-		if (s->s_state == CEPH_MDS_SESSION_OPEN ||
-		    s->s_state == CEPH_MDS_SESSION_HUNG)
-			ceph_send_cap_releases(mdsc, s);
+		if (!metric_only) {
+			if (renew_caps)
+				send_renew_caps(mdsc, s);
+			else
+				ceph_con_keepalive(&s->s_con);
+			if (s->s_state == CEPH_MDS_SESSION_OPEN ||
+					s->s_state == CEPH_MDS_SESSION_HUNG)
+				ceph_send_cap_releases(mdsc, s);
+		}
 
 		mutex_unlock(&s->s_mutex);
 		ceph_put_mds_session(s);
@@ -4233,6 +4252,9 @@  static void delayed_work(struct work_struct *work)
 	}
 	mutex_unlock(&mdsc->mutex);
 
+	if (metric_only)
+		goto delay_work;
+
 	ceph_check_delayed_caps(mdsc);
 
 	ceph_queue_cap_reclaim_work(mdsc);
@@ -4241,11 +4263,13 @@  static void delayed_work(struct work_struct *work)
 
 	maybe_recover_session(mdsc);
 
+delay_work:
 	schedule_delayed(mdsc);
 }
 
-static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
+static int ceph_mdsc_metric_init(struct ceph_mds_client *mdsc)
 {
+	struct ceph_client_metric *metric = &mdsc->metric;
 	int ret;
 
 	if (!metric)
@@ -4259,6 +4283,8 @@  static int ceph_mdsc_metric_init(struct ceph_client_metric *metric)
 		goto err;
 
 	atomic64_set(&metric->total_dentries, 0);
+	mdsc->sending_metrics = 0;
+	mdsc->ticks = 0;
 	return 0;
 
 err:
@@ -4319,7 +4345,7 @@  int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	init_waitqueue_head(&mdsc->cap_flushing_wq);
 	INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
 	atomic_set(&mdsc->cap_reclaim_pending, 0);
-	err = ceph_mdsc_metric_init(&mdsc->metric);
+	err = ceph_mdsc_metric_init(mdsc);
 	if (err)
 		goto err_mdsmap;
 
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 6b91f20132c0..c926256fc76b 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -447,6 +447,9 @@  struct ceph_mds_client {
 	struct list_head  dentry_leases;     /* fifo list */
 	struct list_head  dentry_dir_leases; /* lru list */
 
+	/* metrics */
+	unsigned int		  sending_metrics;
+	unsigned int		  ticks;
 	struct ceph_client_metric metric;
 
 	spinlock_t		snapid_map_lock;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 88da9e21af75..9d2a5f1ce418 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -123,6 +123,7 @@  struct ceph_fs_client {
 	struct dentry *debugfs_congestion_kb;
 	struct dentry *debugfs_bdi;
 	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
+	struct dentry *debugfs_sending_metrics;
 	struct dentry *debugfs_metric;
 	struct dentry *debugfs_mds_sessions;
 #endif