diff mbox series

[2/6] writeback: support retrieving per group debug writeback stats of bdi

Message ID 20240320110222.6564-3-shikemeng@huaweicloud.com (mailing list archive)
State New
Headers show
Series Improve visibility of writeback | expand

Commit Message

Kemeng Shi March 20, 2024, 11:02 a.m. UTC
Add /sys/kernel/debug/bdi/xxx/wb_stats to show per group writeback stats
of bdi.

Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
---
 include/linux/writeback.h |  1 +
 mm/backing-dev.c          | 66 +++++++++++++++++++++++++++++++++++++++
 mm/page-writeback.c       | 19 +++++++++++
 3 files changed, 86 insertions(+)

Comments

Tejun Heo March 20, 2024, 3:01 p.m. UTC | #1
On Wed, Mar 20, 2024 at 07:02:18PM +0800, Kemeng Shi wrote:
> diff --git a/include/linux/writeback.h b/include/linux/writeback.h
> index 9845cb62e40b..bb1ce1123b52 100644
> --- a/include/linux/writeback.h
> +++ b/include/linux/writeback.h
> @@ -355,6 +355,7 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
>  
>  void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
>  unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
> +unsigned long wb_calc_cg_thresh(struct bdi_writeback *wb);

Would cgwb_calc_thresh() be an easier name?

Thanks.
Kemeng Shi March 21, 2024, 3:45 a.m. UTC | #2
on 3/20/2024 11:01 PM, Tejun Heo wrote:
> On Wed, Mar 20, 2024 at 07:02:18PM +0800, Kemeng Shi wrote:
>> diff --git a/include/linux/writeback.h b/include/linux/writeback.h
>> index 9845cb62e40b..bb1ce1123b52 100644
>> --- a/include/linux/writeback.h
>> +++ b/include/linux/writeback.h
>> @@ -355,6 +355,7 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
>>  
>>  void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
>>  unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
>> +unsigned long wb_calc_cg_thresh(struct bdi_writeback *wb);
> 
> Would cgwb_calc_thresh() be an easier name?
> 
Sure, will rename it in next version. Thansk!
> Thanks.
>
Jan Kara March 26, 2024, 12:24 p.m. UTC | #3
On Wed 20-03-24 19:02:18, Kemeng Shi wrote:
> Add /sys/kernel/debug/bdi/xxx/wb_stats to show per group writeback stats
> of bdi.
> 
> Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>

...

> +unsigned long wb_calc_cg_thresh(struct bdi_writeback *wb)
> +{
> +	struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
> +	struct dirty_throttle_control mdtc = { MDTC_INIT(wb, &gdtc) };
> +	unsigned long filepages, headroom, writeback;
> +
> +	gdtc.avail = global_dirtyable_memory();
> +	gdtc.dirty = global_node_page_state(NR_FILE_DIRTY) +
> +		     global_node_page_state(NR_WRITEBACK);
> +
> +	mem_cgroup_wb_stats(wb, &filepages, &headroom,
> +			    &mdtc.dirty, &writeback);
> +	mdtc.dirty += writeback;
> +	mdtc_calc_avail(&mdtc, filepages, headroom);
> +	domain_dirty_limits(&mdtc);
> +
> +	return __wb_calc_thresh(&mdtc, mdtc.thresh);
> +}

I kind of wish we didn't replicate this logic from balance_dirty_pages()
and wb_over_bg_thresh() into yet another place. But the refactoring would
be kind of difficult. So OK.

								Honza
Kemeng Shi March 26, 2024, 1:26 p.m. UTC | #4
on 3/26/2024 8:24 PM, Jan Kara wrote:
> On Wed 20-03-24 19:02:18, Kemeng Shi wrote:
>> Add /sys/kernel/debug/bdi/xxx/wb_stats to show per group writeback stats
>> of bdi.
>>
>> Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
> 
> ...
> 
>> +unsigned long wb_calc_cg_thresh(struct bdi_writeback *wb)
>> +{
>> +	struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
>> +	struct dirty_throttle_control mdtc = { MDTC_INIT(wb, &gdtc) };
>> +	unsigned long filepages, headroom, writeback;
>> +
>> +	gdtc.avail = global_dirtyable_memory();
>> +	gdtc.dirty = global_node_page_state(NR_FILE_DIRTY) +
>> +		     global_node_page_state(NR_WRITEBACK);
>> +
>> +	mem_cgroup_wb_stats(wb, &filepages, &headroom,
>> +			    &mdtc.dirty, &writeback);
>> +	mdtc.dirty += writeback;
>> +	mdtc_calc_avail(&mdtc, filepages, headroom);
>> +	domain_dirty_limits(&mdtc);
>> +
>> +	return __wb_calc_thresh(&mdtc, mdtc.thresh);
>> +}
> 
> I kind of wish we didn't replicate this logic from balance_dirty_pages()
> and wb_over_bg_thresh() into yet another place. But the refactoring would
> be kind of difficult. So OK.
Thanks for review the code.
I have considered about this. It's difficult and will introduce a lot
change to non-debug code which make this series uneasy to review.
I will submit another patch for refactoring if I could find a way to
clean the code.

Kemeng
> 
> 								Honza
>
diff mbox series

Patch

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 9845cb62e40b..bb1ce1123b52 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -355,6 +355,7 @@  int dirtytime_interval_handler(struct ctl_table *table, int write,
 
 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
 unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
+unsigned long wb_calc_cg_thresh(struct bdi_writeback *wb);
 
 void wb_update_bandwidth(struct bdi_writeback *wb);
 
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 788702b6c5dd..bfc4079dc7fe 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -95,12 +95,77 @@  static void bdi_collect_stats(struct backing_dev_info *bdi,
 		collect_wb_stats(stats, wb);
 	mutex_unlock(&bdi->cgwb_release_mutex);
 }
+static void wb_stats_show(struct seq_file *m, struct bdi_writeback *wb,
+			  struct wb_stats *stats)
+{
+
+	seq_printf(m,
+		   "WbCgIno:           %10lu\n"
+		   "WbWriteback:       %10lu kB\n"
+		   "WbReclaimable:     %10lu kB\n"
+		   "WbDirtyThresh:     %10lu kB\n"
+		   "WbDirtied:         %10lu kB\n"
+		   "WbWritten:         %10lu kB\n"
+		   "WbWriteBandwidth:  %10lu kBps\n"
+		   "b_dirty:           %10lu\n"
+		   "b_io:              %10lu\n"
+		   "b_more_io:         %10lu\n"
+		   "b_dirty_time:      %10lu\n"
+		   "state:             %10lx\n",
+		   cgroup_ino(wb->memcg_css->cgroup),
+		   K(stats->nr_writeback),
+		   K(stats->nr_reclaimable),
+		   K(stats->wb_thresh),
+		   K(stats->nr_dirtied),
+		   K(stats->nr_written),
+		   K(wb->avg_write_bandwidth),
+		   stats->nr_dirty,
+		   stats->nr_io,
+		   stats->nr_more_io,
+		   stats->nr_dirty_time,
+		   wb->state);
+}
+
+static int cgwb_debug_stats_show(struct seq_file *m, void *v)
+{
+	struct backing_dev_info *bdi = m->private;
+	unsigned long background_thresh;
+	unsigned long dirty_thresh;
+	struct bdi_writeback *wb;
+	struct wb_stats stats;
+
+	global_dirty_limits(&background_thresh, &dirty_thresh);
+
+	mutex_lock(&bdi->cgwb_release_mutex);
+	list_for_each_entry(wb, &bdi->wb_list, bdi_node) {
+		memset(&stats, 0, sizeof(stats));
+		stats.dirty_thresh = dirty_thresh;
+		collect_wb_stats(&stats, wb);
+
+		if (mem_cgroup_wb_domain(wb) != NULL)
+			stats.wb_thresh = min(stats.wb_thresh, wb_calc_cg_thresh(wb));
+
+		wb_stats_show(m, wb, &stats);
+	}
+	mutex_unlock(&bdi->cgwb_release_mutex);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(cgwb_debug_stats);
+
+static void cgwb_debug_register(struct backing_dev_info *bdi)
+{
+	debugfs_create_file("wb_stats", 0444, bdi->debug_dir, bdi,
+			    &cgwb_debug_stats_fops);
+}
 #else
 static void bdi_collect_stats(struct backing_dev_info *bdi,
 			      struct wb_stats *stats)
 {
 	collect_wb_stats(stats, &bdi->wb);
 }
+
+static inline void cgwb_debug_register(struct backing_dev_info *bdi) { }
 #endif
 
 static int bdi_debug_stats_show(struct seq_file *m, void *v)
@@ -158,6 +223,7 @@  static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
 
 	debugfs_create_file("stats", 0444, bdi->debug_dir, bdi,
 			    &bdi_debug_stats_fops);
+	cgwb_debug_register(bdi);
 }
 
 static void bdi_debug_unregister(struct backing_dev_info *bdi)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0e20467367fe..ba1b6b5ae5d6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -893,6 +893,25 @@  unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
 	return __wb_calc_thresh(&gdtc, thresh);
 }
 
+unsigned long wb_calc_cg_thresh(struct bdi_writeback *wb)
+{
+	struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
+	struct dirty_throttle_control mdtc = { MDTC_INIT(wb, &gdtc) };
+	unsigned long filepages, headroom, writeback;
+
+	gdtc.avail = global_dirtyable_memory();
+	gdtc.dirty = global_node_page_state(NR_FILE_DIRTY) +
+		     global_node_page_state(NR_WRITEBACK);
+
+	mem_cgroup_wb_stats(wb, &filepages, &headroom,
+			    &mdtc.dirty, &writeback);
+	mdtc.dirty += writeback;
+	mdtc_calc_avail(&mdtc, filepages, headroom);
+	domain_dirty_limits(&mdtc);
+
+	return __wb_calc_thresh(&mdtc, mdtc.thresh);
+}
+
 /*
  *                           setpoint - dirty 3
  *        f(dirty) := 1.0 + (----------------)