diff mbox series

writeback: memcg: simplify cgroup_writeback_by_id

Message ID 20210722182627.2267368-1-shakeelb@google.com (mailing list archive)
State New
Headers show
Series writeback: memcg: simplify cgroup_writeback_by_id | expand

Commit Message

Shakeel Butt July 22, 2021, 6:26 p.m. UTC
Currently cgroup_writeback_by_id calls mem_cgroup_wb_stats() to get
dirty pages for a memcg. However mem_cgroup_wb_stats() does a lot more
than just get the number of dirty pages. Just directly get the number of
dirty pages instead of calling mem_cgroup_wb_stats(). Also
cgroup_writeback_by_id() is only called for best-effort dirty flushing,
so remove the unused 'nr' parameter and no need to explicitly flush
memcg stats.

Signed-off-by: Shakeel Butt <shakeelb@google.com>
---
 fs/fs-writeback.c          | 20 +++++++++-----------
 include/linux/memcontrol.h | 15 +++++++++++++++
 include/linux/writeback.h  |  2 +-
 mm/memcontrol.c            | 13 +------------
 4 files changed, 26 insertions(+), 24 deletions(-)

Comments

Jan Kara July 26, 2021, 2:56 p.m. UTC | #1
On Thu 22-07-21 11:26:27, Shakeel Butt wrote:
> Currently cgroup_writeback_by_id calls mem_cgroup_wb_stats() to get
> dirty pages for a memcg. However mem_cgroup_wb_stats() does a lot more
> than just get the number of dirty pages. Just directly get the number of
> dirty pages instead of calling mem_cgroup_wb_stats(). Also
> cgroup_writeback_by_id() is only called for best-effort dirty flushing,
> so remove the unused 'nr' parameter and no need to explicitly flush
> memcg stats.
> 
> Signed-off-by: Shakeel Butt <shakeelb@google.com>

Looks good to me. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/fs-writeback.c          | 20 +++++++++-----------
>  include/linux/memcontrol.h | 15 +++++++++++++++
>  include/linux/writeback.h  |  2 +-
>  mm/memcontrol.c            | 13 +------------
>  4 files changed, 26 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
> index 867984e778c3..35894a2dba75 100644
> --- a/fs/fs-writeback.c
> +++ b/fs/fs-writeback.c
> @@ -1039,20 +1039,20 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
>   * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
>   * @bdi_id: target bdi id
>   * @memcg_id: target memcg css id
> - * @nr: number of pages to write, 0 for best-effort dirty flushing
>   * @reason: reason why some writeback work initiated
>   * @done: target wb_completion
>   *
>   * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
>   * with the specified parameters.
>   */
> -int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
> +int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
>  			   enum wb_reason reason, struct wb_completion *done)
>  {
>  	struct backing_dev_info *bdi;
>  	struct cgroup_subsys_state *memcg_css;
>  	struct bdi_writeback *wb;
>  	struct wb_writeback_work *work;
> +	unsigned long dirty;
>  	int ret;
>  
>  	/* lookup bdi and memcg */
> @@ -1081,24 +1081,22 @@ int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
>  	}
>  
>  	/*
> -	 * If @nr is zero, the caller is attempting to write out most of
> +	 * The caller is attempting to write out most of
>  	 * the currently dirty pages.  Let's take the current dirty page
>  	 * count and inflate it by 25% which should be large enough to
>  	 * flush out most dirty pages while avoiding getting livelocked by
>  	 * concurrent dirtiers.
> +	 *
> +	 * BTW the memcg stats are flushed periodically and this is best-effort
> +	 * estimation, so some potential error is ok.
>  	 */
> -	if (!nr) {
> -		unsigned long filepages, headroom, dirty, writeback;
> -
> -		mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
> -				      &writeback);
> -		nr = dirty * 10 / 8;
> -	}
> +	dirty = memcg_page_state(mem_cgroup_from_css(memcg_css), NR_FILE_DIRTY);
> +	dirty = dirty * 10 / 8;
>  
>  	/* issue the writeback work */
>  	work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
>  	if (work) {
> -		work->nr_pages = nr;
> +		work->nr_pages = dirty;
>  		work->sync_mode = WB_SYNC_NONE;
>  		work->range_cyclic = 1;
>  		work->reason = reason;
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index b4c6b613e162..7028d8e4a3d7 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -989,6 +989,16 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
>  	local_irq_restore(flags);
>  }
>  
> +static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
> +{
> +	long x = READ_ONCE(memcg->vmstats.state[idx]);
> +#ifdef CONFIG_SMP
> +	if (x < 0)
> +		x = 0;
> +#endif
> +	return x;
> +}
> +
>  static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
>  					      enum node_stat_item idx)
>  {
> @@ -1444,6 +1454,11 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
>  {
>  }
>  
> +static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
> +{
> +	return 0;
> +}
> +
>  static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
>  					      enum node_stat_item idx)
>  {
> diff --git a/include/linux/writeback.h b/include/linux/writeback.h
> index 1f34ddf284dc..109e0dcd1d21 100644
> --- a/include/linux/writeback.h
> +++ b/include/linux/writeback.h
> @@ -218,7 +218,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
>  void wbc_detach_inode(struct writeback_control *wbc);
>  void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
>  			      size_t bytes);
> -int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
> +int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
>  			   enum wb_reason reason, struct wb_completion *done);
>  void cgroup_writeback_umount(void);
>  bool cleanup_offline_cgwb(struct bdi_writeback *wb);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 35bb5f8f9ea8..6580c2381a3e 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -631,17 +631,6 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
>  	cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
>  }
>  
> -/* idx can be of type enum memcg_stat_item or node_stat_item. */
> -static unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
> -{
> -	long x = READ_ONCE(memcg->vmstats.state[idx]);
> -#ifdef CONFIG_SMP
> -	if (x < 0)
> -		x = 0;
> -#endif
> -	return x;
> -}
> -
>  /* idx can be of type enum memcg_stat_item or node_stat_item. */
>  static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
>  {
> @@ -4609,7 +4598,7 @@ void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
>  		    atomic_read(&frn->done.cnt) == 1) {
>  			frn->at = 0;
>  			trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id);
> -			cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, 0,
> +			cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id,
>  					       WB_REASON_FOREIGN_FLUSH,
>  					       &frn->done);
>  		}
> -- 
> 2.32.0.432.gabb21c7263-goog
>
diff mbox series

Patch

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 867984e778c3..35894a2dba75 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1039,20 +1039,20 @@  static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
  * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
  * @bdi_id: target bdi id
  * @memcg_id: target memcg css id
- * @nr: number of pages to write, 0 for best-effort dirty flushing
  * @reason: reason why some writeback work initiated
  * @done: target wb_completion
  *
  * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
  * with the specified parameters.
  */
-int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
 			   enum wb_reason reason, struct wb_completion *done)
 {
 	struct backing_dev_info *bdi;
 	struct cgroup_subsys_state *memcg_css;
 	struct bdi_writeback *wb;
 	struct wb_writeback_work *work;
+	unsigned long dirty;
 	int ret;
 
 	/* lookup bdi and memcg */
@@ -1081,24 +1081,22 @@  int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
 	}
 
 	/*
-	 * If @nr is zero, the caller is attempting to write out most of
+	 * The caller is attempting to write out most of
 	 * the currently dirty pages.  Let's take the current dirty page
 	 * count and inflate it by 25% which should be large enough to
 	 * flush out most dirty pages while avoiding getting livelocked by
 	 * concurrent dirtiers.
+	 *
+	 * BTW the memcg stats are flushed periodically and this is best-effort
+	 * estimation, so some potential error is ok.
 	 */
-	if (!nr) {
-		unsigned long filepages, headroom, dirty, writeback;
-
-		mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
-				      &writeback);
-		nr = dirty * 10 / 8;
-	}
+	dirty = memcg_page_state(mem_cgroup_from_css(memcg_css), NR_FILE_DIRTY);
+	dirty = dirty * 10 / 8;
 
 	/* issue the writeback work */
 	work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
 	if (work) {
-		work->nr_pages = nr;
+		work->nr_pages = dirty;
 		work->sync_mode = WB_SYNC_NONE;
 		work->range_cyclic = 1;
 		work->reason = reason;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b4c6b613e162..7028d8e4a3d7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -989,6 +989,16 @@  static inline void mod_memcg_state(struct mem_cgroup *memcg,
 	local_irq_restore(flags);
 }
 
+static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+	long x = READ_ONCE(memcg->vmstats.state[idx]);
+#ifdef CONFIG_SMP
+	if (x < 0)
+		x = 0;
+#endif
+	return x;
+}
+
 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 					      enum node_stat_item idx)
 {
@@ -1444,6 +1454,11 @@  static inline void mod_memcg_state(struct mem_cgroup *memcg,
 {
 }
 
+static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+	return 0;
+}
+
 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 					      enum node_stat_item idx)
 {
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 1f34ddf284dc..109e0dcd1d21 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -218,7 +218,7 @@  void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
 void wbc_detach_inode(struct writeback_control *wbc);
 void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
 			      size_t bytes);
-int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
 			   enum wb_reason reason, struct wb_completion *done);
 void cgroup_writeback_umount(void);
 bool cleanup_offline_cgwb(struct bdi_writeback *wb);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 35bb5f8f9ea8..6580c2381a3e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -631,17 +631,6 @@  void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
 	cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
 }
 
-/* idx can be of type enum memcg_stat_item or node_stat_item. */
-static unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
-{
-	long x = READ_ONCE(memcg->vmstats.state[idx]);
-#ifdef CONFIG_SMP
-	if (x < 0)
-		x = 0;
-#endif
-	return x;
-}
-
 /* idx can be of type enum memcg_stat_item or node_stat_item. */
 static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
 {
@@ -4609,7 +4598,7 @@  void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
 		    atomic_read(&frn->done.cnt) == 1) {
 			frn->at = 0;
 			trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id);
-			cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, 0,
+			cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id,
 					       WB_REASON_FOREIGN_FLUSH,
 					       &frn->done);
 		}