[066/212] writeback: memcg: simplify cgroup_writeback_by_id

Message ID	20210902215327.cFSNXjGv5%akpm@linux-foundation.org (mailing list archive)
State	New
Headers	show Return-Path: <SRS0=eNaS=NY=kvack.org=owner-linux-mm@kernel.org> DMARC-Filter: OpenDMARC Filter v1.4.1 mail.kernel.org BCD3361153 Date: Thu, 02 Sep 2021 14:53:27 -0700 From: Andrew Morton <akpm@linux-foundation.org> To: akpm@linux-foundation.org, hannes@cmpxchg.org, jack@suse.cz, linux-mm@kvack.org, mm-commits@vger.kernel.org, shakeelb@google.com, tj@kernel.org, torvalds@linux-foundation.org Subject: [patch 066/212] writeback: memcg: simplify cgroup_writeback_by_id Message-ID: <20210902215327.cFSNXjGv5%akpm@linux-foundation.org> In-Reply-To: <20210902144820.78957dff93d7bea620d55a89@linux-foundation.org> User-Agent: s-nail v14.8.16 Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	[001/212] ia64: fix typo in a comment \| expand [001/212] ia64: fix typo in a comment [002/212] ia64: fix #endif comment for reserve_elfcorehdr() [003/212] ia64: make reserve_elfcorehdr() static [004/212] ia64: make num_rsvd_regions static [005/212] ocfs2: remove an unnecessary condition [006/212] ocfs2: quota_local: fix possible uninitialized-variable access in ocfs2_local_read_info() [007/212] ocfs2: ocfs2_downconvert_lock failure results in deadlock [008/212] arch/csky/kernel/probes/kprobes.c: fix bugon.cocci warnings [009/212] mm, slub: don't call flush_all() from slab_debug_trace_open() [010/212] mm, slub: allocate private object map for debugfs listings [011/212] mm, slub: allocate private object map for validate_slab_cache() [012/212] mm, slub: don't disable irq for debug_check_no_locks_freed() [013/212] mm, slub: remove redundant unfreeze_partials() from put_cpu_partial() [014/212] mm, slub: unify cmpxchg_double_slab() and __cmpxchg_double_slab() [015/212] mm, slub: extract get_partial() from new_slab_objects() [016/212] mm, slub: dissolve new_slab_objects() into ___slab_alloc() [017/212] mm, slub: return slab page from get_partial() and set c->page afterwards [018/212] mm, slub: restructure new page checks in ___slab_alloc() [019/212] mm, slub: simplify kmem_cache_cpu and tid setup [020/212] mm, slub: move disabling/enabling irqs to ___slab_alloc() [021/212] mm, slub: do initial checks in ___slab_alloc() with irqs enabled [022/212] mm, slub: move disabling irqs closer to get_partial() in ___slab_alloc() [023/212] mm, slub: restore irqs around calling new_slab() [024/212] mm, slub: validate slab from partial list or page allocator before making it cpu slab [025/212] mm, slub: check new pages with restored irqs [026/212] mm, slub: stop disabling irqs around get_partial() [027/212] mm, slub: move reset of c->page and freelist out of deactivate_slab() [028/212] mm, slub: make locking in deactivate_slab() irq-safe [029/212] mm, slub: call deactivate_slab() without disabling irqs [030/212] mm, slub: move irq control into unfreeze_partials() [031/212] mm, slub: discard slabs in unfreeze_partials() without irqs disabled [032/212] mm, slub: detach whole partial list at once in unfreeze_partials() [033/212] mm, slub: separate detaching of partial list in unfreeze_partials() from unfreezing [034/212] mm, slub: only disable irq with spin_lock in __unfreeze_partials() [035/212] mm, slub: don't disable irqs in slub_cpu_dead() [036/212] mm, slab: make flush_slab() possible to call with irqs enabled [037/212] mm: slub: move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context [038/212] mm: slub: make object_map_lock a raw_spinlock_t [039/212] mm, slub: optionally save/restore irqs in slab_[un]lock()/ [040/212] mm, slub: make slab_lock() disable irqs with PREEMPT_RT [041/212] mm, slub: protect put_cpu_partial() with disabled irqs instead of cmpxchg [042/212] mm, slub: use migrate_disable() on PREEMPT_RT [043/212] mm, slub: convert kmem_cpu_slab protection to local_lock [044/212] mm/debug_vm_pgtable: introduce struct pgtable_debug_args [045/212] mm/debug_vm_pgtable: use struct pgtable_debug_args in basic tests [046/212] mm/debug_vm_pgtable: use struct pgtable_debug_args in leaf and savewrite tests [047/212] mm/debug_vm_pgtable: use struct pgtable_debug_args in protnone and devmap tests [048/212] mm/debug_vm_pgtable: use struct pgtable_debug_args in soft_dirty and swap tests [049/212] mm/debug_vm_pgtable: use struct pgtable_debug_args in migration and thp tests [050/212] mm/debug_vm_pgtable: use struct pgtable_debug_args in PTE modifying tests [051/212] mm/debug_vm_pgtable: use struct pgtable_debug_args in PMD modifying tests [052/212] mm/debug_vm_pgtable: use struct pgtable_debug_args in PUD modifying tests [053/212] mm/debug_vm_pgtable: use struct pgtable_debug_args in PGD and P4D modifying tests [054/212] mm/debug_vm_pgtable: remove unused code [055/212] mm/debug_vm_pgtable: fix corrupted page flag [056/212] mm: report a more useful address for reclaim acquisition [057/212] mm: add kernel_misc_reclaimable in show_free_areas [058/212] writeback: track number of inodes under writeback [059/212] writeback: reliably update bandwidth estimation [060/212] writeback: fix bandwidth estimate for spiky workload [061/212] writeback: rename domain_update_bandwidth() [062/212] writeback: use READ_ONCE for unlocked reads of writeback stats [063/212] mm: remove irqsave/restore locking from contexts with irqs enabled [064/212] fs: drop_caches: fix skipping over shadow cache inodes [065/212] fs: inode: count invalidated shadow pages in pginodesteal [066/212] writeback: memcg: simplify cgroup_writeback_by_id [067/212] include/linux/buffer_head.h: fix boolreturn.cocci warnings [068/212] mm: gup: remove set but unused local variable major [069/212] mm: gup: remove unneed local variable orig_refs [070/212] mm: gup: remove useless BUG_ON in __get_user_pages() [071/212] mm: gup: fix potential pgmap refcnt leak in __gup_device_huge() [072/212] mm: gup: use helper PAGE_ALIGNED in populate_vma_page_range() [073/212] mm/gup: documentation corrections for gup/pup [074/212] mm/gup: small refactoring: simplify try_grab_page() [075/212] mm/gup: remove try_get_page(), call try_get_compound_head() directly [076/212] fs, mm: fix race in unlinking swapfile [077/212] mm: delete unused get_kernel_page() [078/212] shmem: use raw_spinlock_t for ->stat_lock [079/212] shmem: remove unneeded variable ret [080/212] shmem: remove unneeded header file [081/212] shmem: remove unneeded function forward declaration [082/212] shmem: include header file to declare swap_info [083/212] huge tmpfs: fix fallocate(vanilla) advance over huge pages [084/212] huge tmpfs: fix split_huge_page() after FALLOC_FL_KEEP_SIZE [085/212] huge tmpfs: remove shrinklist addition from shmem_setattr() [086/212] huge tmpfs: revert shmem's use of transhuge_vma_enabled() [087/212] huge tmpfs: move shmem_huge_enabled() upwards [088/212] huge tmpfs: SGP_NOALLOC to stop collapse_file() on race [089/212] huge tmpfs: shmem_is_huge(vma, inode, index) [090/212] huge tmpfs: decide stat.st_blksize by shmem_is_huge() [091/212] shmem: shmem_writepage() split unlikely i915 THP [092/212] mm, memcg: add mem_cgroup_disabled checks in vmpressure and swap-related functions [093/212] mm, memcg: inline mem_cgroup_{charge/uncharge} to improve disabled memcg config [094/212] mm, memcg: inline swap-related functions to improve disabled memcg config [095/212] memcg: enable accounting for pids in nested pid namespaces [096/212] memcg: switch lruvec stats to rstat [097/212] memcg: infrastructure to flush memcg stats [098/212] memcg: charge fs_context and legacy_fs_context [099/212] memcg: enable accounting for mnt_cache entries [100/212] memcg: enable accounting for pollfd and select bits arrays [101/212] memcg: enable accounting for file lock caches [102/212] memcg: enable accounting for fasync_cache [103/212] memcg: enable accounting for new namesapces and struct nsproxy [104/212] memcg: enable accounting of ipc resources [105/212] memcg: enable accounting for signals [106/212] memcg: enable accounting for posix_timers_cache slab [107/212] memcg: enable accounting for ldt_struct objects [108/212] memcg: cleanup racy sum avoidance code [109/212] memcg: replace in_interrupt() by !in_task() in active_memcg() [110/212] mm: memcontrol: set the correct memcg swappiness restriction [111/212] mm, memcg: remove unused functions [112/212] mm, memcg: save some atomic ops when flush is already true [113/212] memcg: fix up drain_local_stock comment [114/212] memcg: make memcg->event_list_lock irqsafe [115/212] selftests/vm: use kselftest skip code for skipped tests [116/212] selftests: Fix spelling mistake "cann't" -> "cannot" [117/212] lazy tlb: introduce lazy mm refcount helper functions [118/212] lazy tlb: allow lazy tlb mm refcounting to be configurable [119/212] lazy tlb: shoot lazies, a non-refcounting lazy tlb option [120/212] powerpc/64s: enable MMU_LAZY_TLB_SHOOTDOWN [121/212] mmc: JZ4740: remove the flush_kernel_dcache_page call in jz4740_mmc_read_data [122/212] mmc: mmc_spi: replace flush_kernel_dcache_page with flush_dcache_page [123/212] scatterlist: replace flush_kernel_dcache_page with flush_dcache_page [125/212] mm,do_huge_pmd_numa_page: remove unnecessary TLB flushing code [126/212] mm: change fault_in_pages_* to have an unsigned size parameter [127/212] mm/pagemap: add mmap_assert_locked() annotations to find_vma() [128/212] remap_file_pages: Use vma_lookup() instead of find_vma() [129/212] mm/mremap: fix memory account on do_munmap() failure [130/212] mm/bootmem_info.c: mark __init on register_page_bootmem_info_section [131/212] mm: sparse: pass section_nr to section_mark_present [132/212] mm: sparse: pass section_nr to find_memory_block [133/212] mm: sparse: remove __section_nr() function [134/212] mm/sparse: set SECTION_NID_SHIFT to 6 [135/212] include/linux/mmzone.h: avoid a warning in sparse memory support [136/212] mm/sparse: clarify pgdat_to_phys [137/212] mm/vmalloc: use batched page requests in bulk-allocator [138/212] mm/vmalloc: remove gfpflags_allow_blocking() check [139/212] lib/test_vmalloc.c: add a new 'nr_pages' parameter [140/212] mm/vmalloc: fix wrong behavior in vread [141/212] mm/kasan: move kasan.fault to mm/kasan/report.c [142/212] kasan: test: rework kmalloc_oob_right [143/212] kasan: test: avoid writing invalid memory [144/212] kasan: test: avoid corrupting memory via memset [145/212] kasan: test: disable kmalloc_memmove_invalid_size for HW_TAGS [146/212] kasan: test: only do kmalloc_uaf_memset for generic mode [147/212] kasan: test: clean up ksize_uaf [148/212] kasan: test: avoid corrupting memory in copy_user_test [149/212] kasan: test: avoid corrupting memory in kasan_rcu_uaf [150/212] mm/page_alloc: always initialize memory map for the holes [151/212] microblaze: simplify pte_alloc_one_kernel() [152/212] mm: introduce memmap_alloc() to unify memory map allocation [153/212] memblock: stop poisoning raw allocations [154/212] mm/page_alloc.c: fix 'zone_id' may be used uninitialized in this function warning [155/212] mm/page_alloc: make alloc_node_mem_map() __init rather than __ref [156/212] mm/page_alloc.c: use in_task() [157/212] mm/page_isolation: tracing: trace all test_pages_isolated failures [158/212] mm/hwpoison: remove unneeded variable unmap_success [159/212] mm/hwpoison: fix potential pte_unmap_unlock pte error [160/212] mm/hwpoison: change argument struct page hpagep to hpage [161/212] mm/hwpoison: fix some obsolete comments [162/212] mm: hwpoison: don't drop slab caches for offlining non-LRU page [163/212] doc: hwpoison: correct the support for hugepage [164/212] mm: hwpoison: dump page for unhandlable page [165/212] mm: fix panic caused by __page_handle_poison() [166/212] hugetlb: simplify prep_compound_gigantic_page ref count racing code [167/212] hugetlb: drop ref count earlier after page allocation [168/212] hugetlb: before freeing hugetlb page set dtor to appropriate value [169/212] hugetlb: fix hugetlb cgroup refcounting during vma split [170/212] userfaultfd: change mmap_changing to atomic [171/212] userfaultfd: prevent concurrent API initialization [172/212] selftests/vm/userfaultfd: wake after copy failure [173/212] mm/numa: automatically generate node migration order [174/212] mm/migrate: update node demotion order on hotplug events [175/212] mm/migrate: enable returning precise migrate_pages() success count [176/212] mm/migrate: demote pages during reclaim [177/212] mm/vmscan: add page demotion counter [178/212] mm/vmscan: add helper for querying ability to age anonymous pages [179/212] mm/vmscan: Consider anonymous pages without swap [180/212] mm/vmscan: never demote for memcg reclaim [181/212] mm/migrate: add sysfs interface to enable reclaim migration [182/212] mm/vmpressure: replace vmpressure_to_css() with vmpressure_to_memcg() [183/212] mm/vmscan: remove the PageDirty check after MADV_FREE pages are page_ref_freezed [184/212] mm/vmscan: remove misleading setting to sc->priority [185/212] mm/vmscan: remove unneeded return value of kswapd_run() [186/212] mm/vmscan: add 'else' to remove check_pending label [187/212] mm, vmscan: guarantee drop_slab_node() termination [188/212] mm: compaction: optimize proactive compaction deferrals [189/212] mm: compaction: support triggering of proactive compaction by user [190/212] mm/mempolicy: use readable NUMA_NO_NODE macro instead of magic number [191/212] mm/mempolicy: add MPOL_PREFERRED_MANY for multiple preferred nodes [192/212] mm/memplicy: add page allocation function for MPOL_PREFERRED_MANY policy [193/212] mm/hugetlb: add support for mempolicy MPOL_PREFERRED_MANY [194/212] mm/mempolicy: advertise new MPOL_PREFERRED_MANY [195/212] mm/mempolicy: unify the create() func for bind/interleave/prefer-many policies [196/212] mm/mempolicy.c: use in_task() in mempolicy_slab_node() [197/212] memblock: make memblock_find_in_range method private [198/212] mm: introduce process_mrelease system call [199/212] mm: wire up syscall process_mrelease [200/212] mm/migrate: correct kernel-doc notation [201/212] selftests: vm: add KSM merge test [202/212] selftests: vm: add KSM unmerge test [203/212] selftests: vm: add KSM zero page merging test [204/212] selftests: vm: add KSM merging across nodes test [205/212] mm: KSM: fix data type [206/212] selftests: vm: add KSM merging time test [207/212] selftests: vm: add COW time test for KSM pages [208/212] mm/percpu,c: remove obsolete comments of pcpu_chunk_populated() [209/212] mm/vmstat: correct some wrong comments [210/212] mm/vmstat: simplify the array size calculation [211/212] mm/vmstat: remove unneeded return value [212/212] mm/madvise: add MADV_WILLNEED to process_madvise()

Message ID

20210902215327.cFSNXjGv5%akpm@linux-foundation.org (mailing list archive)

State

New

Headers

DMARC-Filter: OpenDMARC Filter v1.4.1 mail.kernel.org BCD3361153
Date: Thu, 02 Sep 2021 14:53:27 -0700
From: Andrew Morton <akpm@linux-foundation.org>
To: akpm@linux-foundation.org, hannes@cmpxchg.org, jack@suse.cz,
 linux-mm@kvack.org, mm-commits@vger.kernel.org, shakeelb@google.com,
 tj@kernel.org, torvalds@linux-foundation.org
Subject: [patch 066/212] writeback: memcg: simplify
 cgroup_writeback_by_id
Message-ID: <20210902215327.cFSNXjGv5%akpm@linux-foundation.org>
In-Reply-To: <20210902144820.78957dff93d7bea620d55a89@linux-foundation.org>
User-Agent: s-nail v14.8.16
Sender: owner-linux-mm@kvack.org
Precedence: bulk

Series

[001/212] ia64: fix typo in a comment | expand

Commit Message

Andrew Morton Sept. 2, 2021, 9:53 p.m. UTC

From: Shakeel Butt <shakeelb@google.com>
Subject: writeback: memcg: simplify cgroup_writeback_by_id

Currently cgroup_writeback_by_id calls mem_cgroup_wb_stats() to get dirty
pages for a memcg.  However mem_cgroup_wb_stats() does a lot more than
just get the number of dirty pages.  Just directly get the number of dirty
pages instead of calling mem_cgroup_wb_stats().  Also
cgroup_writeback_by_id() is only called for best-effort dirty flushing, so
remove the unused 'nr' parameter and no need to explicitly flush memcg
stats.

Link: https://lkml.kernel.org/r/20210722182627.2267368-1-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/fs-writeback.c          |   20 +++++++++-----------
 include/linux/memcontrol.h |   15 +++++++++++++++
 include/linux/writeback.h  |    2 +-
 mm/memcontrol.c            |   13 +------------
 4 files changed, 26 insertions(+), 24 deletions(-)

--- a/fs/fs-writeback.c~writeback-memcg-simplify-cgroup_writeback_by_id
+++ a/fs/fs-writeback.c
@@ -1039,20 +1039,20 @@  restart:
  * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
  * @bdi_id: target bdi id
  * @memcg_id: target memcg css id
- * @nr: number of pages to write, 0 for best-effort dirty flushing
  * @reason: reason why some writeback work initiated
  * @done: target wb_completion
  *
  * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
  * with the specified parameters.
  */
-int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
 			   enum wb_reason reason, struct wb_completion *done)
 {
 	struct backing_dev_info *bdi;
 	struct cgroup_subsys_state *memcg_css;
 	struct bdi_writeback *wb;
 	struct wb_writeback_work *work;
+	unsigned long dirty;
 	int ret;
 
 	/* lookup bdi and memcg */
@@ -1081,24 +1081,22 @@  int cgroup_writeback_by_id(u64 bdi_id, i
 	}
 
 	/*
-	 * If @nr is zero, the caller is attempting to write out most of
+	 * The caller is attempting to write out most of
 	 * the currently dirty pages.  Let's take the current dirty page
 	 * count and inflate it by 25% which should be large enough to
 	 * flush out most dirty pages while avoiding getting livelocked by
 	 * concurrent dirtiers.
+	 *
+	 * BTW the memcg stats are flushed periodically and this is best-effort
+	 * estimation, so some potential error is ok.
 	 */
-	if (!nr) {
-		unsigned long filepages, headroom, dirty, writeback;
-
-		mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
-				      &writeback);
-		nr = dirty * 10 / 8;
-	}
+	dirty = memcg_page_state(mem_cgroup_from_css(memcg_css), NR_FILE_DIRTY);
+	dirty = dirty * 10 / 8;
 
 	/* issue the writeback work */
 	work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
 	if (work) {
-		work->nr_pages = nr;
+		work->nr_pages = dirty;
 		work->sync_mode = WB_SYNC_NONE;
 		work->range_cyclic = 1;
 		work->reason = reason;
--- a/include/linux/memcontrol.h~writeback-memcg-simplify-cgroup_writeback_by_id
+++ a/include/linux/memcontrol.h
@@ -955,6 +955,16 @@  static inline void mod_memcg_state(struc
 	local_irq_restore(flags);
 }
 
+static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+	long x = READ_ONCE(memcg->vmstats.state[idx]);
+#ifdef CONFIG_SMP
+	if (x < 0)
+		x = 0;
+#endif
+	return x;
+}
+
 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 					      enum node_stat_item idx)
 {
@@ -1391,6 +1401,11 @@  static inline void mod_memcg_state(struc
 {
 }
 
+static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+	return 0;
+}
+
 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 					      enum node_stat_item idx)
 {
--- a/include/linux/writeback.h~writeback-memcg-simplify-cgroup_writeback_by_id
+++ a/include/linux/writeback.h
@@ -218,7 +218,7 @@  void wbc_attach_and_unlock_inode(struct
 void wbc_detach_inode(struct writeback_control *wbc);
 void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
 			      size_t bytes);
-int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
 			   enum wb_reason reason, struct wb_completion *done);
 void cgroup_writeback_umount(void);
 bool cleanup_offline_cgwb(struct bdi_writeback *wb);
--- a/mm/memcontrol.c~writeback-memcg-simplify-cgroup_writeback_by_id
+++ a/mm/memcontrol.c
@@ -646,17 +646,6 @@  void __mod_memcg_state(struct mem_cgroup
 }
 
 /* idx can be of type enum memcg_stat_item or node_stat_item. */
-static unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
-{
-	long x = READ_ONCE(memcg->vmstats.state[idx]);
-#ifdef CONFIG_SMP
-	if (x < 0)
-		x = 0;
-#endif
-	return x;
-}
-
-/* idx can be of type enum memcg_stat_item or node_stat_item. */
 static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
 {
 	long x = 0;
@@ -4668,7 +4657,7 @@  void mem_cgroup_flush_foreign(struct bdi
 		    atomic_read(&frn->done.cnt) == 1) {
 			frn->at = 0;
 			trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id);
-			cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, 0,
+			cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id,
 					       WB_REASON_FOREIGN_FLUSH,
 					       &frn->done);
 		}

[066/212] writeback: memcg: simplify cgroup_writeback_by_id

Commit Message

Patch