Patchwork mm: memcontrol: provide shmem statistics

login
register
mail settings
Submitter Johannes Weiner
Date Feb. 21, 2017, 4:43 p.m.
Message ID <20170221164343.32252-1-hannes@cmpxchg.org>
Download mbox | patch
Permalink /patch/9585155/
State New
Headers show

Comments

Johannes Weiner - Feb. 21, 2017, 4:43 p.m.
Cgroups currently don't report how much shmem they use, which can be
useful data to have, in particular since shmem is included in the
cache/file item while being reclaimed like anonymous memory.

Add a counter to track shmem pages during charging and uncharging.

Reported-by: Chris Down <cdown@fb.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 Documentation/cgroup-v2.txt |  5 +++++
 include/linux/memcontrol.h  |  1 +
 mm/memcontrol.c             | 28 ++++++++++++++++++++--------
 3 files changed, 26 insertions(+), 8 deletions(-)
Balbir Singh - Feb. 22, 2017, 4:12 a.m.
On Tue, Feb 21, 2017 at 11:43:43AM -0500, Johannes Weiner wrote:
> Cgroups currently don't report how much shmem they use, which can be
> useful data to have, in particular since shmem is included in the
> cache/file item while being reclaimed like anonymous memory.
> 
> Add a counter to track shmem pages during charging and uncharging.
> 
> Reported-by: Chris Down <cdown@fb.com>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> ---

Makes sense

Acked-by: Balbir Singh <bsingharora@gmail.com>
Michal Hocko - Feb. 22, 2017, 8:12 a.m.
On Tue 21-02-17 11:43:43, Johannes Weiner wrote:
> Cgroups currently don't report how much shmem they use, which can be
> useful data to have, in particular since shmem is included in the
> cache/file item while being reclaimed like anonymous memory.
> 
> Add a counter to track shmem pages during charging and uncharging.

Yes this is indeed useful. Accounting shmem to the page cache was a
mistake because this is more than confusing. Sad we cannot fix that.
I would have just one concern with this patch. You are relying on
PageSwapBacked check but it looks like we are going to implement
MADV_FREE by dropping this flag. I know we do not support MADV_FREE
on shared mappings but if we ever do then the accounting will become
subtly broken. Can/Should we rely on shmem_mapping() check instead?

Other than that the patch looks good to me.

> Reported-by: Chris Down <cdown@fb.com>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
>  Documentation/cgroup-v2.txt |  5 +++++
>  include/linux/memcontrol.h  |  1 +
>  mm/memcontrol.c             | 28 ++++++++++++++++++++--------
>  3 files changed, 26 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
> index 4cc07ce3b8dd..d99389ce7b01 100644
> --- a/Documentation/cgroup-v2.txt
> +++ b/Documentation/cgroup-v2.txt
> @@ -867,6 +867,11 @@ PAGE_SIZE multiple when read back.
>  
>  		Amount of memory used in network transmission buffers
>  
> +	  shmem
> +
> +		Amount of cached filesystem data that is swap-backed,
> +		such as tmpfs, shm segments, shared anonymous mmap()s
> +
>  	  file_mapped
>  
>  		Amount of cached filesystem data mapped with mmap()
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 61d20c17f3b7..47bdf727d1ad 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -46,6 +46,7 @@ enum mem_cgroup_stat_index {
>  	MEM_CGROUP_STAT_CACHE,		/* # of pages charged as cache */
>  	MEM_CGROUP_STAT_RSS,		/* # of pages charged as anon rss */
>  	MEM_CGROUP_STAT_RSS_HUGE,	/* # of pages charged as anon huge */
> +	MEM_CGROUP_STAT_SHMEM,		/* # of pages charged as shmem */
>  	MEM_CGROUP_STAT_FILE_MAPPED,	/* # of pages charged as file rss */
>  	MEM_CGROUP_STAT_DIRTY,          /* # of dirty pages in page cache */
>  	MEM_CGROUP_STAT_WRITEBACK,	/* # of pages under writeback */
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 9c9cde768d40..49409f5c0238 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -102,6 +102,7 @@ static const char * const mem_cgroup_stat_names[] = {
>  	"cache",
>  	"rss",
>  	"rss_huge",
> +	"shmem",
>  	"mapped_file",
>  	"dirty",
>  	"writeback",
> @@ -601,9 +602,13 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
>  	if (PageAnon(page))
>  		__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
>  				nr_pages);
> -	else
> +	else {
>  		__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE],
>  				nr_pages);
> +		if (PageSwapBacked(page))
> +			__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SHMEM],
> +				       nr_pages);
> +	}
>  
>  	if (compound) {
>  		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
> @@ -5200,6 +5205,8 @@ static int memory_stat_show(struct seq_file *m, void *v)
>  	seq_printf(m, "sock %llu\n",
>  		   (u64)stat[MEMCG_SOCK] * PAGE_SIZE);
>  
> +	seq_printf(m, "shmem %llu\n",
> +		   (u64)stat[MEM_CGROUP_STAT_SHMEM] * PAGE_SIZE);
>  	seq_printf(m, "file_mapped %llu\n",
>  		   (u64)stat[MEM_CGROUP_STAT_FILE_MAPPED] * PAGE_SIZE);
>  	seq_printf(m, "file_dirty %llu\n",
> @@ -5468,8 +5475,8 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
>  
>  static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
>  			   unsigned long nr_anon, unsigned long nr_file,
> -			   unsigned long nr_huge, unsigned long nr_kmem,
> -			   struct page *dummy_page)
> +			   unsigned long nr_kmem, unsigned long nr_huge,
> +			   unsigned long nr_shmem, struct page *dummy_page)
>  {
>  	unsigned long nr_pages = nr_anon + nr_file + nr_kmem;
>  	unsigned long flags;
> @@ -5487,6 +5494,7 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
>  	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
>  	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
>  	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
> +	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_SHMEM], nr_shmem);
>  	__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
>  	__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
>  	memcg_check_events(memcg, dummy_page);
> @@ -5499,6 +5507,7 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
>  static void uncharge_list(struct list_head *page_list)
>  {
>  	struct mem_cgroup *memcg = NULL;
> +	unsigned long nr_shmem = 0;
>  	unsigned long nr_anon = 0;
>  	unsigned long nr_file = 0;
>  	unsigned long nr_huge = 0;
> @@ -5531,9 +5540,9 @@ static void uncharge_list(struct list_head *page_list)
>  		if (memcg != page->mem_cgroup) {
>  			if (memcg) {
>  				uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
> -					       nr_huge, nr_kmem, page);
> -				pgpgout = nr_anon = nr_file =
> -					nr_huge = nr_kmem = 0;
> +					       nr_kmem, nr_huge, nr_shmem, page);
> +				pgpgout = nr_anon = nr_file = nr_kmem = 0;
> +				nr_huge = nr_shmem = 0;
>  			}
>  			memcg = page->mem_cgroup;
>  		}
> @@ -5547,8 +5556,11 @@ static void uncharge_list(struct list_head *page_list)
>  			}
>  			if (PageAnon(page))
>  				nr_anon += nr_pages;
> -			else
> +			else {
>  				nr_file += nr_pages;
> +				if (PageSwapBacked(page))
> +					nr_shmem += nr_pages;
> +			}
>  			pgpgout++;
>  		} else {
>  			nr_kmem += 1 << compound_order(page);
> @@ -5560,7 +5572,7 @@ static void uncharge_list(struct list_head *page_list)
>  
>  	if (memcg)
>  		uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
> -			       nr_huge, nr_kmem, page);
> +			       nr_kmem, nr_huge, nr_shmem, page);
>  }
>  
>  /**
> -- 
> 2.11.1
Johannes Weiner - Feb. 22, 2017, 12:45 p.m.
On Wed, Feb 22, 2017 at 09:12:31AM +0100, Michal Hocko wrote:
> On Tue 21-02-17 11:43:43, Johannes Weiner wrote:
> > Cgroups currently don't report how much shmem they use, which can be
> > useful data to have, in particular since shmem is included in the
> > cache/file item while being reclaimed like anonymous memory.
> > 
> > Add a counter to track shmem pages during charging and uncharging.
> 
> Yes this is indeed useful. Accounting shmem to the page cache was a
> mistake because this is more than confusing. Sad we cannot fix that.

Agreed, this continues to cause confusion with many Linux users :(

> I would have just one concern with this patch. You are relying on
> PageSwapBacked check but it looks like we are going to implement
> MADV_FREE by dropping this flag. I know we do not support MADV_FREE
> on shared mappings but if we ever do then the accounting will become
> subtly broken. Can/Should we rely on shmem_mapping() check instead?

Yes, right now we do MADV_FREE only on private pages, so this patch is
safe with Shaohua's changes to how we use PG_swapbacked.

Should we support MADV_FREE on shared mappings in the future, using
shmem_mapping() for memcg accounting won't work unfortunately, because
shared pages are truncated from the page cache before uncharging, and
that clears the page->mapping pointer. However, in that case we could
probably unaccount the pages from shmem at the time of MADV_FREE, when
we clear the PG_swapbacked bit.

> Other than that the patch looks good to me.

Thanks!
Michal Hocko - Feb. 22, 2017, 1:24 p.m.
On Wed 22-02-17 07:45:01, Johannes Weiner wrote:
> On Wed, Feb 22, 2017 at 09:12:31AM +0100, Michal Hocko wrote:
> > On Tue 21-02-17 11:43:43, Johannes Weiner wrote:
> > > Cgroups currently don't report how much shmem they use, which can be
> > > useful data to have, in particular since shmem is included in the
> > > cache/file item while being reclaimed like anonymous memory.
> > > 
> > > Add a counter to track shmem pages during charging and uncharging.
> > 
> > Yes this is indeed useful. Accounting shmem to the page cache was a
> > mistake because this is more than confusing. Sad we cannot fix that.
> 
> Agreed, this continues to cause confusion with many Linux users :(
> 
> > I would have just one concern with this patch. You are relying on
> > PageSwapBacked check but it looks like we are going to implement
> > MADV_FREE by dropping this flag. I know we do not support MADV_FREE
> > on shared mappings but if we ever do then the accounting will become
> > subtly broken. Can/Should we rely on shmem_mapping() check instead?
> 
> Yes, right now we do MADV_FREE only on private pages, so this patch is
> safe with Shaohua's changes to how we use PG_swapbacked.
> 
> Should we support MADV_FREE on shared mappings in the future, using
> shmem_mapping() for memcg accounting won't work unfortunately, because
> shared pages are truncated from the page cache before uncharging, and
> that clears the page->mapping pointer.

You are right!

> However, in that case we could
> probably unaccount the pages from shmem at the time of MADV_FREE, when
> we clear the PG_swapbacked bit.

Or we can just keep the code as is and add a comment to
madvise_free_single_vma to remind that memcg charging would have to be
handled properly if we want to drop vma_is_anonymous check there. It is
really hard to tell whether we ever get a support for MADV_FREE for
shared pages.

> > Other than that the patch looks good to me.
> 
> Thanks!

Patch

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 4cc07ce3b8dd..d99389ce7b01 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -867,6 +867,11 @@  PAGE_SIZE multiple when read back.
 
 		Amount of memory used in network transmission buffers
 
+	  shmem
+
+		Amount of cached filesystem data that is swap-backed,
+		such as tmpfs, shm segments, shared anonymous mmap()s
+
 	  file_mapped
 
 		Amount of cached filesystem data mapped with mmap()
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 61d20c17f3b7..47bdf727d1ad 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -46,6 +46,7 @@  enum mem_cgroup_stat_index {
 	MEM_CGROUP_STAT_CACHE,		/* # of pages charged as cache */
 	MEM_CGROUP_STAT_RSS,		/* # of pages charged as anon rss */
 	MEM_CGROUP_STAT_RSS_HUGE,	/* # of pages charged as anon huge */
+	MEM_CGROUP_STAT_SHMEM,		/* # of pages charged as shmem */
 	MEM_CGROUP_STAT_FILE_MAPPED,	/* # of pages charged as file rss */
 	MEM_CGROUP_STAT_DIRTY,          /* # of dirty pages in page cache */
 	MEM_CGROUP_STAT_WRITEBACK,	/* # of pages under writeback */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9c9cde768d40..49409f5c0238 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -102,6 +102,7 @@  static const char * const mem_cgroup_stat_names[] = {
 	"cache",
 	"rss",
 	"rss_huge",
+	"shmem",
 	"mapped_file",
 	"dirty",
 	"writeback",
@@ -601,9 +602,13 @@  static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 	if (PageAnon(page))
 		__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
 				nr_pages);
-	else
+	else {
 		__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE],
 				nr_pages);
+		if (PageSwapBacked(page))
+			__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SHMEM],
+				       nr_pages);
+	}
 
 	if (compound) {
 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
@@ -5200,6 +5205,8 @@  static int memory_stat_show(struct seq_file *m, void *v)
 	seq_printf(m, "sock %llu\n",
 		   (u64)stat[MEMCG_SOCK] * PAGE_SIZE);
 
+	seq_printf(m, "shmem %llu\n",
+		   (u64)stat[MEM_CGROUP_STAT_SHMEM] * PAGE_SIZE);
 	seq_printf(m, "file_mapped %llu\n",
 		   (u64)stat[MEM_CGROUP_STAT_FILE_MAPPED] * PAGE_SIZE);
 	seq_printf(m, "file_dirty %llu\n",
@@ -5468,8 +5475,8 @@  void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
 
 static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
 			   unsigned long nr_anon, unsigned long nr_file,
-			   unsigned long nr_huge, unsigned long nr_kmem,
-			   struct page *dummy_page)
+			   unsigned long nr_kmem, unsigned long nr_huge,
+			   unsigned long nr_shmem, struct page *dummy_page)
 {
 	unsigned long nr_pages = nr_anon + nr_file + nr_kmem;
 	unsigned long flags;
@@ -5487,6 +5494,7 @@  static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
 	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
 	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
 	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
+	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_SHMEM], nr_shmem);
 	__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
 	__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
 	memcg_check_events(memcg, dummy_page);
@@ -5499,6 +5507,7 @@  static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
 static void uncharge_list(struct list_head *page_list)
 {
 	struct mem_cgroup *memcg = NULL;
+	unsigned long nr_shmem = 0;
 	unsigned long nr_anon = 0;
 	unsigned long nr_file = 0;
 	unsigned long nr_huge = 0;
@@ -5531,9 +5540,9 @@  static void uncharge_list(struct list_head *page_list)
 		if (memcg != page->mem_cgroup) {
 			if (memcg) {
 				uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
-					       nr_huge, nr_kmem, page);
-				pgpgout = nr_anon = nr_file =
-					nr_huge = nr_kmem = 0;
+					       nr_kmem, nr_huge, nr_shmem, page);
+				pgpgout = nr_anon = nr_file = nr_kmem = 0;
+				nr_huge = nr_shmem = 0;
 			}
 			memcg = page->mem_cgroup;
 		}
@@ -5547,8 +5556,11 @@  static void uncharge_list(struct list_head *page_list)
 			}
 			if (PageAnon(page))
 				nr_anon += nr_pages;
-			else
+			else {
 				nr_file += nr_pages;
+				if (PageSwapBacked(page))
+					nr_shmem += nr_pages;
+			}
 			pgpgout++;
 		} else {
 			nr_kmem += 1 << compound_order(page);
@@ -5560,7 +5572,7 @@  static void uncharge_list(struct list_head *page_list)
 
 	if (memcg)
 		uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
-			       nr_huge, nr_kmem, page);
+			       nr_kmem, nr_huge, nr_shmem, page);
 }
 
 /**