diff mbox series

proc/meminfo: add MemKernel counter

Message ID 155853600919.381.8172097084053782598.stgit@buzz (mailing list archive)
State New, archived
Headers show
Series proc/meminfo: add MemKernel counter | expand

Commit Message

Konstantin Khlebnikov May 22, 2019, 2:40 p.m. UTC
Some kinds of kernel allocations are not accounted or not show in meminfo.
For example vmalloc allocations are tracked but overall size is not shown
for performance reasons. There is no information about network buffers.

In most cases detailed statistics is not required. At first place we need
information about overall kernel memory usage regardless of its structure.

This patch estimates kernel memory usage by subtracting known sizes of
free, anonymous, hugetlb and caches from total memory size: MemKernel =
MemTotal - MemFree - Buffers - Cached - SwapCached - AnonPages - Hugetlb.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
 Documentation/filesystems/proc.txt |    5 +++++
 fs/proc/meminfo.c                  |   20 +++++++++++++++-----
 2 files changed, 20 insertions(+), 5 deletions(-)

Comments

Vlastimil Babka May 22, 2019, 3:01 p.m. UTC | #1
On 5/22/19 4:40 PM, Konstantin Khlebnikov wrote:
> Some kinds of kernel allocations are not accounted or not show in meminfo.
> For example vmalloc allocations are tracked but overall size is not shown

I think Roman's vmalloc patch [1] is on its way?

> for performance reasons. There is no information about network buffers.

xfs buffers can also occupy a lot, from my experience

> In most cases detailed statistics is not required. At first place we need
> information about overall kernel memory usage regardless of its structure.
> 
> This patch estimates kernel memory usage by subtracting known sizes of
> free, anonymous, hugetlb and caches from total memory size: MemKernel =
> MemTotal - MemFree - Buffers - Cached - SwapCached - AnonPages - Hugetlb.
> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

I've tried this once in [2]. The name was Unaccounted and one of the objections
was that people would get worried. Yours is a bit better, perhaps MemKernMisc
would be even more descriptive? Michal Hocko worried about maintainability, that
we forget something, but I don't think that's a big issue.

Vlastimil

[1] https://lore.kernel.org/linux-mm/20190514235111.2817276-2-guro@fb.com/T/#u
[2] https://lore.kernel.org/linux-mm/20161020121149.9935-1-vbabka@suse.cz/T/#u

> ---
>  Documentation/filesystems/proc.txt |    5 +++++
>  fs/proc/meminfo.c                  |   20 +++++++++++++++-----
>  2 files changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
> index 66cad5c86171..a0ab7f273ea0 100644
> --- a/Documentation/filesystems/proc.txt
> +++ b/Documentation/filesystems/proc.txt
> @@ -860,6 +860,7 @@ varies by architecture and compile options.  The following is from a
>  
>  MemTotal:     16344972 kB
>  MemFree:      13634064 kB
> +MemKernel:      862600 kB
>  MemAvailable: 14836172 kB
>  Buffers:          3656 kB
>  Cached:        1195708 kB
> @@ -908,6 +909,10 @@ MemAvailable: An estimate of how much memory is available for starting new
>                page cache to function well, and that not all reclaimable
>                slab will be reclaimable, due to items being in use. The
>                impact of those factors will vary from system to system.
> +   MemKernel: The sum of all kinds of kernel memory allocations: Slab,
> +              Vmalloc, Percpu, KernelStack, PageTables, socket buffers,
> +              and some other untracked allocations. Does not include
> +              MemFree, Buffers, Cached, SwapCached, AnonPages, Hugetlb.
>       Buffers: Relatively temporary storage for raw disk blocks
>                shouldn't get tremendously large (20MB or so)
>        Cached: in-memory cache for files read from the disk (the
> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> index 568d90e17c17..b27d56dd619a 100644
> --- a/fs/proc/meminfo.c
> +++ b/fs/proc/meminfo.c
> @@ -39,17 +39,27 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>  	long available;
>  	unsigned long pages[NR_LRU_LISTS];
>  	unsigned long sreclaimable, sunreclaim;
> +	unsigned long anon_pages, file_pages, swap_cached;
> +	long kernel_pages;
>  	int lru;
>  
>  	si_meminfo(&i);
>  	si_swapinfo(&i);
>  	committed = percpu_counter_read_positive(&vm_committed_as);
>  
> -	cached = global_node_page_state(NR_FILE_PAGES) -
> -			total_swapcache_pages() - i.bufferram;
> +	anon_pages = global_node_page_state(NR_ANON_MAPPED);
> +	file_pages = global_node_page_state(NR_FILE_PAGES);
> +	swap_cached = total_swapcache_pages();
> +
> +	cached = file_pages - swap_cached - i.bufferram;
>  	if (cached < 0)
>  		cached = 0;
>  
> +	kernel_pages = i.totalram - i.freeram - anon_pages - file_pages -
> +		       hugetlb_total_pages();
> +	if (kernel_pages < 0)
> +		kernel_pages = 0;
> +
>  	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
>  		pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
>  
> @@ -60,9 +70,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>  	show_val_kb(m, "MemTotal:       ", i.totalram);
>  	show_val_kb(m, "MemFree:        ", i.freeram);
>  	show_val_kb(m, "MemAvailable:   ", available);
> +	show_val_kb(m, "MemKernel:      ", kernel_pages);
>  	show_val_kb(m, "Buffers:        ", i.bufferram);
>  	show_val_kb(m, "Cached:         ", cached);
> -	show_val_kb(m, "SwapCached:     ", total_swapcache_pages());
> +	show_val_kb(m, "SwapCached:     ", swap_cached);
>  	show_val_kb(m, "Active:         ", pages[LRU_ACTIVE_ANON] +
>  					   pages[LRU_ACTIVE_FILE]);
>  	show_val_kb(m, "Inactive:       ", pages[LRU_INACTIVE_ANON] +
> @@ -92,8 +103,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>  		    global_node_page_state(NR_FILE_DIRTY));
>  	show_val_kb(m, "Writeback:      ",
>  		    global_node_page_state(NR_WRITEBACK));
> -	show_val_kb(m, "AnonPages:      ",
> -		    global_node_page_state(NR_ANON_MAPPED));
> +	show_val_kb(m, "AnonPages:      ", anon_pages);
>  	show_val_kb(m, "Mapped:         ",
>  		    global_node_page_state(NR_FILE_MAPPED));
>  	show_val_kb(m, "Shmem:          ", i.sharedram);
>
Konstantin Khlebnikov May 22, 2019, 3:30 p.m. UTC | #2
On 22.05.2019 18:01, Vlastimil Babka wrote:
> On 5/22/19 4:40 PM, Konstantin Khlebnikov wrote:
>> Some kinds of kernel allocations are not accounted or not show in meminfo.
>> For example vmalloc allocations are tracked but overall size is not shown
> 
> I think Roman's vmalloc patch [1] is on its way?
> 
>> for performance reasons. There is no information about network buffers.
> 
> xfs buffers can also occupy a lot, from my experience
> 
>> In most cases detailed statistics is not required. At first place we need
>> information about overall kernel memory usage regardless of its structure.
>>
>> This patch estimates kernel memory usage by subtracting known sizes of
>> free, anonymous, hugetlb and caches from total memory size: MemKernel =
>> MemTotal - MemFree - Buffers - Cached - SwapCached - AnonPages - Hugetlb.
>>
>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> 
> I've tried this once in [2]. The name was Unaccounted and one of the objections
> was that people would get worried. Yours is a bit better, perhaps MemKernMisc
> would be even more descriptive? Michal Hocko worried about maintainability, that
> we forget something, but I don't think that's a big issue.

I've started with Misc/Unaccounted too
https://lore.kernel.org/lkml/155792098821.1536.17069603544573830315.stgit@buzz/

But this version simply shows all kernel memory.

> 
> Vlastimil
> 
> [1] https://lore.kernel.org/linux-mm/20190514235111.2817276-2-guro@fb.com/T/#u
> [2] https://lore.kernel.org/linux-mm/20161020121149.9935-1-vbabka@suse.cz/T/#u
> 
>> ---
>>   Documentation/filesystems/proc.txt |    5 +++++
>>   fs/proc/meminfo.c                  |   20 +++++++++++++++-----
>>   2 files changed, 20 insertions(+), 5 deletions(-)
>>
>> diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
>> index 66cad5c86171..a0ab7f273ea0 100644
>> --- a/Documentation/filesystems/proc.txt
>> +++ b/Documentation/filesystems/proc.txt
>> @@ -860,6 +860,7 @@ varies by architecture and compile options.  The following is from a
>>   
>>   MemTotal:     16344972 kB
>>   MemFree:      13634064 kB
>> +MemKernel:      862600 kB
>>   MemAvailable: 14836172 kB
>>   Buffers:          3656 kB
>>   Cached:        1195708 kB
>> @@ -908,6 +909,10 @@ MemAvailable: An estimate of how much memory is available for starting new
>>                 page cache to function well, and that not all reclaimable
>>                 slab will be reclaimable, due to items being in use. The
>>                 impact of those factors will vary from system to system.
>> +   MemKernel: The sum of all kinds of kernel memory allocations: Slab,
>> +              Vmalloc, Percpu, KernelStack, PageTables, socket buffers,
>> +              and some other untracked allocations. Does not include
>> +              MemFree, Buffers, Cached, SwapCached, AnonPages, Hugetlb.
>>        Buffers: Relatively temporary storage for raw disk blocks
>>                 shouldn't get tremendously large (20MB or so)
>>         Cached: in-memory cache for files read from the disk (the
>> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
>> index 568d90e17c17..b27d56dd619a 100644
>> --- a/fs/proc/meminfo.c
>> +++ b/fs/proc/meminfo.c
>> @@ -39,17 +39,27 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>>   	long available;
>>   	unsigned long pages[NR_LRU_LISTS];
>>   	unsigned long sreclaimable, sunreclaim;
>> +	unsigned long anon_pages, file_pages, swap_cached;
>> +	long kernel_pages;
>>   	int lru;
>>   
>>   	si_meminfo(&i);
>>   	si_swapinfo(&i);
>>   	committed = percpu_counter_read_positive(&vm_committed_as);
>>   
>> -	cached = global_node_page_state(NR_FILE_PAGES) -
>> -			total_swapcache_pages() - i.bufferram;
>> +	anon_pages = global_node_page_state(NR_ANON_MAPPED);
>> +	file_pages = global_node_page_state(NR_FILE_PAGES);
>> +	swap_cached = total_swapcache_pages();
>> +
>> +	cached = file_pages - swap_cached - i.bufferram;
>>   	if (cached < 0)
>>   		cached = 0;
>>   
>> +	kernel_pages = i.totalram - i.freeram - anon_pages - file_pages -
>> +		       hugetlb_total_pages();
>> +	if (kernel_pages < 0)
>> +		kernel_pages = 0;
>> +
>>   	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
>>   		pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
>>   
>> @@ -60,9 +70,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>>   	show_val_kb(m, "MemTotal:       ", i.totalram);
>>   	show_val_kb(m, "MemFree:        ", i.freeram);
>>   	show_val_kb(m, "MemAvailable:   ", available);
>> +	show_val_kb(m, "MemKernel:      ", kernel_pages);
>>   	show_val_kb(m, "Buffers:        ", i.bufferram);
>>   	show_val_kb(m, "Cached:         ", cached);
>> -	show_val_kb(m, "SwapCached:     ", total_swapcache_pages());
>> +	show_val_kb(m, "SwapCached:     ", swap_cached);
>>   	show_val_kb(m, "Active:         ", pages[LRU_ACTIVE_ANON] +
>>   					   pages[LRU_ACTIVE_FILE]);
>>   	show_val_kb(m, "Inactive:       ", pages[LRU_INACTIVE_ANON] +
>> @@ -92,8 +103,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>>   		    global_node_page_state(NR_FILE_DIRTY));
>>   	show_val_kb(m, "Writeback:      ",
>>   		    global_node_page_state(NR_WRITEBACK));
>> -	show_val_kb(m, "AnonPages:      ",
>> -		    global_node_page_state(NR_ANON_MAPPED));
>> +	show_val_kb(m, "AnonPages:      ", anon_pages);
>>   	show_val_kb(m, "Mapped:         ",
>>   		    global_node_page_state(NR_FILE_MAPPED));
>>   	show_val_kb(m, "Shmem:          ", i.sharedram);
>>
>
Michal Hocko May 22, 2019, 3:52 p.m. UTC | #3
On Wed 22-05-19 17:40:09, Konstantin Khlebnikov wrote:
> Some kinds of kernel allocations are not accounted or not show in meminfo.
> For example vmalloc allocations are tracked but overall size is not shown
> for performance reasons. There is no information about network buffers.
> 
> In most cases detailed statistics is not required. At first place we need
> information about overall kernel memory usage regardless of its structure.
> 
> This patch estimates kernel memory usage by subtracting known sizes of
> free, anonymous, hugetlb and caches from total memory size: MemKernel =
> MemTotal - MemFree - Buffers - Cached - SwapCached - AnonPages - Hugetlb.

Why do we need to export something that can be calculated in the
userspace trivially? Also is this really something the number really
meaningful? Say you have a driver that exports memory to the userspace
via mmap but that memory is not accounted. Is this really a kernel
memory?
Konstantin Khlebnikov May 22, 2019, 4:09 p.m. UTC | #4
On 22.05.2019 18:52, Michal Hocko wrote:
> On Wed 22-05-19 17:40:09, Konstantin Khlebnikov wrote:
>> Some kinds of kernel allocations are not accounted or not show in meminfo.
>> For example vmalloc allocations are tracked but overall size is not shown
>> for performance reasons. There is no information about network buffers.
>>
>> In most cases detailed statistics is not required. At first place we need
>> information about overall kernel memory usage regardless of its structure.
>>
>> This patch estimates kernel memory usage by subtracting known sizes of
>> free, anonymous, hugetlb and caches from total memory size: MemKernel =
>> MemTotal - MemFree - Buffers - Cached - SwapCached - AnonPages - Hugetlb.
> 
> Why do we need to export something that can be calculated in the
> userspace trivially? Also is this really something the number really
> meaningful? Say you have a driver that exports memory to the userspace
> via mmap but that memory is not accounted. Is this really a kernel
> memory?
> 

It may be trivial right now but not fixed.
Adding new kinds of memory may change this definition.
For example hypothetical 'GPU buffers' may be handled as 'userspace' memory.
Roman Gushchin May 22, 2019, 5:03 p.m. UTC | #5
On Wed, May 22, 2019 at 07:09:22PM +0300, Konstantin Khlebnikov wrote:
> On 22.05.2019 18:52, Michal Hocko wrote:
> > On Wed 22-05-19 17:40:09, Konstantin Khlebnikov wrote:
> > > Some kinds of kernel allocations are not accounted or not show in meminfo.
> > > For example vmalloc allocations are tracked but overall size is not shown
> > > for performance reasons. There is no information about network buffers.
> > > 
> > > In most cases detailed statistics is not required. At first place we need
> > > information about overall kernel memory usage regardless of its structure.
> > > 
> > > This patch estimates kernel memory usage by subtracting known sizes of
> > > free, anonymous, hugetlb and caches from total memory size: MemKernel =
> > > MemTotal - MemFree - Buffers - Cached - SwapCached - AnonPages - Hugetlb.
> > 
> > Why do we need to export something that can be calculated in the
> > userspace trivially? Also is this really something the number really
> > meaningful? Say you have a driver that exports memory to the userspace
> > via mmap but that memory is not accounted. Is this really a kernel
> > memory?
> > 
> 
> It may be trivial right now but not fixed.
> Adding new kinds of memory may change this definition.

Right, and it's what causes me to agree with Michal here, and leave it
to the userspace calculation.

The real meaning of the counter is the size of the "gray zone",
basically the memory which we have no clue about.

If we'll add accounting of some new type of memory, which now in this
gray zone (say, xfs buffers), we probably should exclude it too.
And this means that definition of this counter will change.

So IMO the definition is way too implementation-defined to be a part
of procfs API.
Konstantin Khlebnikov May 22, 2019, 6:18 p.m. UTC | #6
On Wed, May 22, 2019 at 8:04 PM Roman Gushchin <guro@fb.com> wrote:
>
> On Wed, May 22, 2019 at 07:09:22PM +0300, Konstantin Khlebnikov wrote:
> > On 22.05.2019 18:52, Michal Hocko wrote:
> > > On Wed 22-05-19 17:40:09, Konstantin Khlebnikov wrote:
> > > > Some kinds of kernel allocations are not accounted or not show in meminfo.
> > > > For example vmalloc allocations are tracked but overall size is not shown
> > > > for performance reasons. There is no information about network buffers.
> > > >
> > > > In most cases detailed statistics is not required. At first place we need
> > > > information about overall kernel memory usage regardless of its structure.
> > > >
> > > > This patch estimates kernel memory usage by subtracting known sizes of
> > > > free, anonymous, hugetlb and caches from total memory size: MemKernel =
> > > > MemTotal - MemFree - Buffers - Cached - SwapCached - AnonPages - Hugetlb.
> > >
> > > Why do we need to export something that can be calculated in the
> > > userspace trivially? Also is this really something the number really
> > > meaningful? Say you have a driver that exports memory to the userspace
> > > via mmap but that memory is not accounted. Is this really a kernel
> > > memory?
> > >
> >
> > It may be trivial right now but not fixed.
> > Adding new kinds of memory may change this definition.
>
> Right, and it's what causes me to agree with Michal here, and leave it
> to the userspace calculation.
>
> The real meaning of the counter is the size of the "gray zone",
> basically the memory which we have no clue about.

Well, all kernel memory is a gray zone for normal programmers.
They have direct control only over anon and file-cache.

I want to invent simple metrics for 'system' memory usage.
It's about the same as separation cpu time to user and system.

> If we'll add accounting of some new type of memory, which now in this
> gray zone (say, xfs buffers), we probably should exclude it too.
> And this means that definition of this counter will change.

I'm not very familiar with xfs internals, never digged into it.
I've excluded buffers because this is simply file-cache for block devices.
Filesystems use it as cache for metadata. But userspace has direct access to it.

>
> So IMO the definition is way too implementation-defined to be a part
> of procfs API.
>

Ok. User/kernel memory separation could be redefined in more
abstract manner depending on the data access.
diff mbox series

Patch

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 66cad5c86171..a0ab7f273ea0 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -860,6 +860,7 @@  varies by architecture and compile options.  The following is from a
 
 MemTotal:     16344972 kB
 MemFree:      13634064 kB
+MemKernel:      862600 kB
 MemAvailable: 14836172 kB
 Buffers:          3656 kB
 Cached:        1195708 kB
@@ -908,6 +909,10 @@  MemAvailable: An estimate of how much memory is available for starting new
               page cache to function well, and that not all reclaimable
               slab will be reclaimable, due to items being in use. The
               impact of those factors will vary from system to system.
+   MemKernel: The sum of all kinds of kernel memory allocations: Slab,
+              Vmalloc, Percpu, KernelStack, PageTables, socket buffers,
+              and some other untracked allocations. Does not include
+              MemFree, Buffers, Cached, SwapCached, AnonPages, Hugetlb.
      Buffers: Relatively temporary storage for raw disk blocks
               shouldn't get tremendously large (20MB or so)
       Cached: in-memory cache for files read from the disk (the
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 568d90e17c17..b27d56dd619a 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -39,17 +39,27 @@  static int meminfo_proc_show(struct seq_file *m, void *v)
 	long available;
 	unsigned long pages[NR_LRU_LISTS];
 	unsigned long sreclaimable, sunreclaim;
+	unsigned long anon_pages, file_pages, swap_cached;
+	long kernel_pages;
 	int lru;
 
 	si_meminfo(&i);
 	si_swapinfo(&i);
 	committed = percpu_counter_read_positive(&vm_committed_as);
 
-	cached = global_node_page_state(NR_FILE_PAGES) -
-			total_swapcache_pages() - i.bufferram;
+	anon_pages = global_node_page_state(NR_ANON_MAPPED);
+	file_pages = global_node_page_state(NR_FILE_PAGES);
+	swap_cached = total_swapcache_pages();
+
+	cached = file_pages - swap_cached - i.bufferram;
 	if (cached < 0)
 		cached = 0;
 
+	kernel_pages = i.totalram - i.freeram - anon_pages - file_pages -
+		       hugetlb_total_pages();
+	if (kernel_pages < 0)
+		kernel_pages = 0;
+
 	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
 		pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
 
@@ -60,9 +70,10 @@  static int meminfo_proc_show(struct seq_file *m, void *v)
 	show_val_kb(m, "MemTotal:       ", i.totalram);
 	show_val_kb(m, "MemFree:        ", i.freeram);
 	show_val_kb(m, "MemAvailable:   ", available);
+	show_val_kb(m, "MemKernel:      ", kernel_pages);
 	show_val_kb(m, "Buffers:        ", i.bufferram);
 	show_val_kb(m, "Cached:         ", cached);
-	show_val_kb(m, "SwapCached:     ", total_swapcache_pages());
+	show_val_kb(m, "SwapCached:     ", swap_cached);
 	show_val_kb(m, "Active:         ", pages[LRU_ACTIVE_ANON] +
 					   pages[LRU_ACTIVE_FILE]);
 	show_val_kb(m, "Inactive:       ", pages[LRU_INACTIVE_ANON] +
@@ -92,8 +103,7 @@  static int meminfo_proc_show(struct seq_file *m, void *v)
 		    global_node_page_state(NR_FILE_DIRTY));
 	show_val_kb(m, "Writeback:      ",
 		    global_node_page_state(NR_WRITEBACK));
-	show_val_kb(m, "AnonPages:      ",
-		    global_node_page_state(NR_ANON_MAPPED));
+	show_val_kb(m, "AnonPages:      ", anon_pages);
 	show_val_kb(m, "Mapped:         ",
 		    global_node_page_state(NR_FILE_MAPPED));
 	show_val_kb(m, "Shmem:          ", i.sharedram);