mm/backing-dev: show state of all bdi_writeback in debugfs
diff mbox series

Message ID 156388617236.3608.2194886130557491278.stgit@buzz
State New
Headers show
Series
  • mm/backing-dev: show state of all bdi_writeback in debugfs
Related show

Commit Message

Konstantin Khlebnikov July 23, 2019, 12:49 p.m. UTC
Currently /sys/kernel/debug/bdi/$maj:$min/stats shows only root bdi wb.
With CONFIG_CGROUP_WRITEBACK=y there is one for each memory cgroup.

This patch shows here state of each bdi_writeback in form:

<global state>

Id: 1
Cgroup: /
<root wb state>

Id: xxx
Cgroup: /path
<cgroup wb state>

Id: yyy
Cgroup: /path2
<cgroup wb state>

...

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
 mm/backing-dev.c |  106 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 93 insertions(+), 13 deletions(-)

Comments

Andrew Morton July 23, 2019, 8:07 p.m. UTC | #1
On Tue, 23 Jul 2019 15:49:32 +0300 Konstantin Khlebnikov <khlebnikov@yandex-team.ru> wrote:

> Currently /sys/kernel/debug/bdi/$maj:$min/stats shows only root bdi wb.
> With CONFIG_CGROUP_WRITEBACK=y there is one for each memory cgroup.
> 
> This patch shows here state of each bdi_writeback in form:
> 
> <global state>
> 
> Id: 1
> Cgroup: /
> <root wb state>
> 
> Id: xxx
> Cgroup: /path
> <cgroup wb state>
> 
> Id: yyy
> Cgroup: /path2
> <cgroup wb state>

Why is this considered useful?  What are the use cases.  ie, why should
we add this to Linux?

> mm/backing-dev.c |  106 +++++++++++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 93 insertions(+), 13 deletions(-)

No documentation because it's debugfs, right?

I'm struggling to understand why this is a good thing :(.  If it's
there and people use it then we should document it for them.  If it's
there and people don't use it then we should delete the code.
Konstantin Khlebnikov July 23, 2019, 9:24 p.m. UTC | #2
On Tue, Jul 23, 2019 at 11:07 PM Andrew Morton
<akpm@linux-foundation.org> wrote:
>
> On Tue, 23 Jul 2019 15:49:32 +0300 Konstantin Khlebnikov <khlebnikov@yandex-team.ru> wrote:
>
> > Currently /sys/kernel/debug/bdi/$maj:$min/stats shows only root bdi wb.
> > With CONFIG_CGROUP_WRITEBACK=y there is one for each memory cgroup.
> >
> > This patch shows here state of each bdi_writeback in form:
> >
> > <global state>
> >
> > Id: 1
> > Cgroup: /
> > <root wb state>
> >
> > Id: xxx
> > Cgroup: /path
> > <cgroup wb state>
> >
> > Id: yyy
> > Cgroup: /path2
> > <cgroup wb state>
>
> Why is this considered useful?  What are the use cases.  ie, why should
> we add this to Linux?
>
> > mm/backing-dev.c |  106 +++++++++++++++++++++++++++++++++++++++++++++++-------
> > 1 file changed, 93 insertions(+), 13 deletions(-)
>
> No documentation because it's debugfs, right?
>
> I'm struggling to understand why this is a good thing :(.  If it's
> there and people use it then we should document it for them.  If it's
> there and people don't use it then we should delete the code.
>

Well. Cgroup writeback has huge internal state:
bdi_writeback for each pair (bdi, memory cgroup ) which refers to some
blkio cgroup.
Each of them has writeback rate estimation, bunch of counters for
pages and flows and so on.
All this rich state almost completely hidden and gives no clue when
something goes wrong.
Debugging such dynamic structure with gdb is a pain.

Also all these features are artificially tied with cgroup2 interface
so almost nobody use them right now.

This patch extends legacy debug manhole to expose bit of actual state.
Alternative is exactly removing this debugfs file.

I'm using this debugfs interface for croups and find it very useful:
https://lore.kernel.org/patchwork/patch/973846/
but writeback has another dimension so needs own interface.
Tejun Heo July 23, 2019, 10:36 p.m. UTC | #3
On Wed, Jul 24, 2019 at 12:24:41AM +0300, Konstantin Khlebnikov wrote:
> Debugging such dynamic structure with gdb is a pain.

Use drgn.  It's a lot better than hard coding these debug features
into the kernel.

  https://github.com/osandov/drgn

Thanks.

Patch
diff mbox series

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index e8e89158adec..3e752c4bafaf 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -45,7 +45,7 @@  static void bdi_debug_init(void)
 static int bdi_debug_stats_show(struct seq_file *m, void *v)
 {
 	struct backing_dev_info *bdi = m->private;
-	struct bdi_writeback *wb = &bdi->wb;
+	struct bdi_writeback *wb = v;
 	unsigned long background_thresh;
 	unsigned long dirty_thresh;
 	unsigned long wb_thresh;
@@ -65,43 +65,123 @@  static int bdi_debug_stats_show(struct seq_file *m, void *v)
 			nr_dirty_time++;
 	spin_unlock(&wb->list_lock);
 
-	global_dirty_limits(&background_thresh, &dirty_thresh);
-	wb_thresh = wb_calc_thresh(wb, dirty_thresh);
-
 #define K(x) ((x) << (PAGE_SHIFT - 10))
+
+	/* global state */
+	if (wb == &bdi->wb) {
+		global_dirty_limits(&background_thresh, &dirty_thresh);
+		wb_thresh = wb_calc_thresh(wb, dirty_thresh);
+		seq_printf(m,
+			   "BdiDirtyThresh:     %10lu kB\n"
+			   "DirtyThresh:        %10lu kB\n"
+			   "BackgroundThresh:   %10lu kB\n"
+			   "bdi_list:           %10u\n",
+			   K(wb_thresh),
+			   K(dirty_thresh),
+			   K(background_thresh),
+			   !list_empty(&bdi->bdi_list));
+	}
+
+	/* cgroup header */
+#ifdef CONFIG_CGROUP_WRITEBACK
+	if (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) {
+		size_t buflen, len;
+		char *buf;
+
+		seq_printf(m, "\nId: %d\nCgroup: ", wb->memcg_css->id);
+		buflen = seq_get_buf(m, &buf);
+		if (buf) {
+			len = cgroup_path(wb->memcg_css->cgroup, buf, buflen);
+			seq_commit(m, len <= buflen ? len : -1);
+			seq_putc(m, '\n');
+		}
+	}
+#endif /* CONFIG_CGROUP_WRITEBACK */
+
 	seq_printf(m,
 		   "BdiWriteback:       %10lu kB\n"
 		   "BdiReclaimable:     %10lu kB\n"
-		   "BdiDirtyThresh:     %10lu kB\n"
-		   "DirtyThresh:        %10lu kB\n"
-		   "BackgroundThresh:   %10lu kB\n"
 		   "BdiDirtied:         %10lu kB\n"
 		   "BdiWritten:         %10lu kB\n"
 		   "BdiWriteBandwidth:  %10lu kBps\n"
+		   "BdiAvgWriteBwidth:  %10lu kBps\n"
 		   "b_dirty:            %10lu\n"
 		   "b_io:               %10lu\n"
 		   "b_more_io:          %10lu\n"
 		   "b_dirty_time:       %10lu\n"
-		   "bdi_list:           %10u\n"
 		   "state:              %10lx\n",
 		   (unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
 		   (unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
-		   K(wb_thresh),
-		   K(dirty_thresh),
-		   K(background_thresh),
 		   (unsigned long) K(wb_stat(wb, WB_DIRTIED)),
 		   (unsigned long) K(wb_stat(wb, WB_WRITTEN)),
 		   (unsigned long) K(wb->write_bandwidth),
+		   (unsigned long) K(wb->avg_write_bandwidth),
 		   nr_dirty,
 		   nr_io,
 		   nr_more_io,
 		   nr_dirty_time,
-		   !list_empty(&bdi->bdi_list), bdi->wb.state);
+		   wb->state);
 #undef K
 
 	return 0;
 }
-DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats);
+
+static void *bdi_debug_stats_start(struct seq_file *m, loff_t *ppos)
+{
+	struct backing_dev_info *bdi = m->private;
+	struct bdi_writeback *wb;
+	loff_t pos = *ppos;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
+		if (pos-- == 0)
+			return wb;
+	return NULL;
+}
+
+static void *bdi_debug_stats_next(struct seq_file *m, void *v, loff_t *ppos)
+{
+	struct backing_dev_info *bdi = m->private;
+	struct bdi_writeback *wb = v;
+
+	list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
+		++*ppos;
+		return wb;
+	}
+	return NULL;
+}
+
+static void bdi_debug_stats_stop(struct seq_file *m, void *v)
+{
+	rcu_read_unlock();
+}
+
+static const struct seq_operations bdi_debug_stats_seq_ops = {
+	.start	= bdi_debug_stats_start,
+	.next	= bdi_debug_stats_next,
+	.stop	= bdi_debug_stats_stop,
+	.show	= bdi_debug_stats_show,
+};
+
+static int bdi_debug_stats_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &bdi_debug_stats_seq_ops);
+	if (!ret) {
+		m = file->private_data;
+		m->private = inode->i_private;
+	}
+	return ret;
+}
+
+static const struct file_operations bdi_debug_stats_fops = {
+	.open		= bdi_debug_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
 
 static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
 {