@@ -269,15 +269,13 @@ void __inode_attach_wb(struct inode *inode, struct folio *folio)
if (inode_cgwb_enabled(inode)) {
struct cgroup_subsys_state *memcg_css;
- if (folio) {
- memcg_css = mem_cgroup_css_from_folio(folio);
- wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
- } else {
- /* must pin memcg_css, see wb_get_create() */
+ /* must pin memcg_css, see wb_get_create() */
+ if (folio)
+ memcg_css = get_mem_cgroup_css_from_folio(folio);
+ else
memcg_css = task_get_css(current, memory_cgrp_id);
- wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
- css_put(memcg_css);
- }
+ wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
+ css_put(memcg_css);
}
if (!wb)
@@ -929,16 +927,16 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio
if (!wbc->wb || wbc->no_cgroup_owner)
return;
- css = mem_cgroup_css_from_folio(folio);
+ css = get_mem_cgroup_css_from_folio(folio);
/* dead cgroups shouldn't contribute to inode ownership arbitration */
if (!(css->flags & CSS_ONLINE))
- return;
+ goto out;
id = css->id;
if (id == wbc->wb_id) {
wbc->wb_bytes += bytes;
- return;
+ goto out;
}
if (id == wbc->wb_lcand_id)
@@ -951,6 +949,8 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio
wbc->wb_tcand_bytes += bytes;
else
wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
+out:
+ css_put(css);
}
EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
@@ -874,7 +874,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm,
return match;
}
-struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio);
+struct cgroup_subsys_state *get_mem_cgroup_css_from_folio(struct folio *folio);
ino_t page_cgroup_ino(struct page *page);
static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
@@ -1594,9 +1594,14 @@ static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
if (mem_cgroup_disabled())
return;
+ if (!folio_memcg_charged(folio))
+ return;
+
+ rcu_read_lock();
memcg = folio_memcg(folio);
- if (unlikely(memcg && &memcg->css != wb->memcg_css))
+ if (unlikely(&memcg->css != wb->memcg_css))
mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
+ rcu_read_unlock();
}
void mem_cgroup_flush_foreign(struct bdi_writeback *wb);
@@ -266,7 +266,10 @@ TRACE_EVENT(track_foreign_dirty,
__entry->ino = inode ? inode->i_ino : 0;
__entry->memcg_id = wb->memcg_css->id;
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
+
+ rcu_read_lock();
__entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup);
+ rcu_read_unlock();
),
TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu",
@@ -229,7 +229,7 @@ DEFINE_STATIC_KEY_FALSE(memcg_bpf_enabled_key);
EXPORT_SYMBOL(memcg_bpf_enabled_key);
/**
- * mem_cgroup_css_from_folio - css of the memcg associated with a folio
+ * get_mem_cgroup_css_from_folio - acquire a css of the memcg associated with a folio
* @folio: folio of interest
*
* If memcg is bound to the default hierarchy, css of the memcg associated
@@ -239,14 +239,16 @@ EXPORT_SYMBOL(memcg_bpf_enabled_key);
* If memcg is bound to a traditional hierarchy, the css of root_mem_cgroup
* is returned.
*/
-struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio)
+struct cgroup_subsys_state *get_mem_cgroup_css_from_folio(struct folio *folio)
{
- struct mem_cgroup *memcg = folio_memcg(folio);
+ struct mem_cgroup *memcg;
- if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
- memcg = root_mem_cgroup;
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ return &root_mem_cgroup->css;
- return &memcg->css;
+ memcg = get_mem_cgroup_from_folio(folio);
+
+ return memcg ? &memcg->css : &root_mem_cgroup->css;
}
/**
In the near future, a folio will no longer pin its corresponding memory cgroup. To ensure safety, it will only be appropriate to hold the rcu read lock or acquire a reference to the memory cgroup returned by folio_memcg(), thereby preventing it from being released. In the current patch, the function get_mem_cgroup_css_from_folio() and the rcu read lock are employed to safeguard against the release of the memory cgroup. This serves as a preparatory measure for the reparenting of the LRU pages. Signed-off-by: Muchun Song <songmuchun@bytedance.com> --- fs/fs-writeback.c | 22 +++++++++++----------- include/linux/memcontrol.h | 9 +++++++-- include/trace/events/writeback.h | 3 +++ mm/memcontrol.c | 14 ++++++++------ 4 files changed, 29 insertions(+), 19 deletions(-)