@@ -80,6 +80,9 @@ There are four components to pagemap:
memory cgroup each page is charged to, indexed by PFN. Only available when
CONFIG_MEMCG is set.
+ For offline (removed) cgroup this returnes inode number of closest online
+ ancestor. Write 64-bit flag 1 into opened file for getting real owners.
+
Short descriptions to the page flags
====================================
@@ -248,6 +248,7 @@ static const struct file_operations proc_kpageflags_operations = {
static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
+ unsigned long flags = (unsigned long)file->private_data;
u64 __user *out = (u64 __user *)buf;
struct page *ppage;
unsigned long src = *ppos;
@@ -267,7 +268,7 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
ppage = NULL;
if (ppage)
- ino = page_cgroup_ino(ppage);
+ ino = page_cgroup_ino(ppage, !(flags & 1));
else
ino = 0;
@@ -289,9 +290,28 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
return ret;
}
+static ssize_t kpagecgroup_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u64 flags;
+
+ if (count != 8)
+ return -EINVAL;
+
+ if (get_user(flags, buf))
+ return -EFAULT;
+
+ if (flags > 1)
+ return -EINVAL;
+
+ file->private_data = (void *)(unsigned long)flags;
+ return count;
+}
+
static const struct file_operations proc_kpagecgroup_operations = {
.llseek = mem_lseek,
.read = kpagecgroup_read,
+ .write = kpagecgroup_write,
};
#endif /* CONFIG_MEMCG */
@@ -300,7 +320,7 @@ static int __init proc_page_init(void)
proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
#ifdef CONFIG_MEMCG
- proc_create("kpagecgroup", S_IRUSR, NULL, &proc_kpagecgroup_operations);
+ proc_create("kpagecgroup", 0600, NULL, &proc_kpagecgroup_operations);
#endif
return 0;
}
@@ -444,7 +444,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm,
}
struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page);
-ino_t page_cgroup_ino(struct page *page);
+ino_t page_cgroup_ino(struct page *page, bool online);
static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
{
@@ -333,6 +333,7 @@ struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page)
/**
* page_cgroup_ino - return inode number of the memcg a page is charged to
* @page: the page
+ * @online: return closest online ancestor
*
* Look up the closest online ancestor of the memory cgroup @page is charged to
* and return its inode number or 0 if @page is not charged to any cgroup. It
@@ -343,14 +344,14 @@ struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page)
* after page_cgroup_ino() returns, so it only should be used by callers that
* do not care (such as procfs interfaces).
*/
-ino_t page_cgroup_ino(struct page *page)
+ino_t page_cgroup_ino(struct page *page, bool online)
{
struct mem_cgroup *memcg;
unsigned long ino = 0;
rcu_read_lock();
memcg = READ_ONCE(page->mem_cgroup);
- while (memcg && !(memcg->css.flags & CSS_ONLINE))
+ while (memcg && online && !(memcg->css.flags & CSS_ONLINE))
memcg = parent_mem_cgroup(memcg);
if (memcg)
ino = cgroup_ino(memcg->css.cgroup);
@@ -139,7 +139,7 @@ static int hwpoison_filter_task(struct page *p)
if (!hwpoison_filter_memcg)
return 0;
- if (page_cgroup_ino(p) != hwpoison_filter_memcg)
+ if (page_cgroup_ino(p, true) != hwpoison_filter_memcg)
return -EINVAL;
return 0;
By default this interface reports inode number of closest online ancestor if cgroups is offline (removed). Information about real owner is required for detecting which pages keep removed cgroup. This patch adds per-file mode which is changed by writing 64-bit flags into opened /proc/kpagecgroup. For now only first bit is used. Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> --- Documentation/admin-guide/mm/pagemap.rst | 3 +++ fs/proc/page.c | 24 ++++++++++++++++++++++-- include/linux/memcontrol.h | 2 +- mm/memcontrol.c | 5 +++-- mm/memory-failure.c | 2 +- 5 files changed, 30 insertions(+), 6 deletions(-)