diff mbox

[1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps

Message ID 20180129080043.14191-1-avagin@openvz.org (mailing list archive)
State New, archived
Headers show

Commit Message

Andrey Vagin Jan. 29, 2018, 8 a.m. UTC
seq_put_decimal_ull_align(m, str, val, width) is equivalent of
seq_printf(m, "%s%*d", str, width, val), but it works much faster.

== test_smaps.py
num = 0
with open("/proc/1/smaps") as f:
        for x in xrange(10000):
                data = f.read()
                f.seek(0, 0)
==

== Before patch ==
$ time python test_smaps.py

real    0m4.593s
user    0m0.398s
sys     0m4.158s

== After patch ==
$ time python test_smaps.py

real    0m3.828s
user    0m0.413s
sys     0m3.408s

$ perf -g record python test_smaps.py
== Before patch ==
-   79.01%     3.36%  python   [kernel.kallsyms]    [k] show_smap.isra.33
   - 75.65% show_smap.isra.33
      + 48.85% seq_printf
      + 15.75% __walk_page_range
      + 9.70% show_map_vma.isra.23
        0.61% seq_puts

== After patch ==
-   75.51%     4.62%  python   [kernel.kallsyms]    [k] show_smap.isra.33
   - 70.88% show_smap.isra.33
      + 24.82% seq_put_decimal_ull_aligned
      + 19.78% __walk_page_range
      + 12.74% seq_printf
      + 11.08% show_map_vma.isra.23
      + 1.68% seq_puts

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 fs/proc/meminfo.c        |  15 +-----
 fs/proc/task_mmu.c       | 124 +++++++++++++++++++----------------------------
 fs/seq_file.c            |  24 +++++++--
 include/linux/kernel.h   |   2 +-
 include/linux/seq_file.h |   2 +
 lib/vsprintf.c           |  18 +++++--
 6 files changed, 88 insertions(+), 97 deletions(-)

Comments

Alexey Dobriyan Jan. 29, 2018, 7:10 p.m. UTC | #1
On Mon, Jan 29, 2018 at 12:00:40AM -0800, Andrei Vagin wrote:
> seq_put_decimal_ull_align(m, str, val, width) is equivalent of
> seq_printf(m, "%s%*d", str, width, val), but it works much faster.

It is called "minimum width", not alignment.
Andrey Vagin Jan. 29, 2018, 9:09 p.m. UTC | #2
On Mon, Jan 29, 2018 at 10:10:42PM +0300, Alexey Dobriyan wrote:
> On Mon, Jan 29, 2018 at 12:00:40AM -0800, Andrei Vagin wrote:
> > seq_put_decimal_ull_align(m, str, val, width) is equivalent of
> > seq_printf(m, "%s%*d", str, width, val), but it works much faster.
> 
> It is called "minimum width", not alignment.

Yes, you are right. And it's used to align numbers in output files.

Maybe you could suggest a better name for this function?

Thanks,
Andrei
Alexey Dobriyan Jan. 31, 2018, 4:57 p.m. UTC | #3
On Mon, Jan 29, 2018 at 01:09:07PM -0800, Andrei Vagin wrote:
> On Mon, Jan 29, 2018 at 10:10:42PM +0300, Alexey Dobriyan wrote:
> > On Mon, Jan 29, 2018 at 12:00:40AM -0800, Andrei Vagin wrote:
> > > seq_put_decimal_ull_align(m, str, val, width) is equivalent of
> > > seq_printf(m, "%s%*d", str, width, val), but it works much faster.
> > 
> > It is called "minimum width", not alignment.
> 
> Yes, you are right. And it's used to align numbers in output files.
> 
> Maybe you could suggest a better name for this function?

I honestly don't know.
Contemplating printing integers backwards.
Andrew Morton Feb. 9, 2018, 12:18 a.m. UTC | #4
On Mon, 29 Jan 2018 13:09:07 -0800 Andrei Vagin <avagin@virtuozzo.com> wrote:

> On Mon, Jan 29, 2018 at 10:10:42PM +0300, Alexey Dobriyan wrote:
> > On Mon, Jan 29, 2018 at 12:00:40AM -0800, Andrei Vagin wrote:
> > > seq_put_decimal_ull_align(m, str, val, width) is equivalent of
> > > seq_printf(m, "%s%*d", str, width, val), but it works much faster.
> > 
> > It is called "minimum width", not alignment.
> 
> Yes, you are right. And it's used to align numbers in output files.
> 
> Maybe you could suggest a better name for this function?
> 

_width?  _min_width_?  _pad?
Andrey Vagin Feb. 12, 2018, 8:06 a.m. UTC | #5
On Thu, Feb 08, 2018 at 04:18:25PM -0800, Andrew Morton wrote:
> On Mon, 29 Jan 2018 13:09:07 -0800 Andrei Vagin <avagin@virtuozzo.com> wrote:
> 
> > On Mon, Jan 29, 2018 at 10:10:42PM +0300, Alexey Dobriyan wrote:
> > > On Mon, Jan 29, 2018 at 12:00:40AM -0800, Andrei Vagin wrote:
> > > > seq_put_decimal_ull_align(m, str, val, width) is equivalent of
> > > > seq_printf(m, "%s%*d", str, width, val), but it works much faster.
> > > 
> > > It is called "minimum width", not alignment.
> > 
> > Yes, you are right. And it's used to align numbers in output files.
> > 
> > Maybe you could suggest a better name for this function?
> > 
> 
> _width?  _min_width_?  _pad?

I chose the first one. Thank you. The second version of these patches
has to be in your mailbox.

Thanks,
Andrei
diff mbox

Patch

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 6bb20f864259..2b197ce8b99f 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -26,20 +26,7 @@  void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
 
 static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
 {
-	char v[32];
-	static const char blanks[7] = {' ', ' ', ' ', ' ',' ', ' ', ' '};
-	int len;
-
-	len = num_to_str(v, sizeof(v), num << (PAGE_SHIFT - 10));
-
-	seq_write(m, s, 16);
-
-	if (len > 0) {
-		if (len < 8)
-			seq_write(m, blanks, 8 - len);
-
-		seq_write(m, v, len);
-	}
+	seq_put_decimal_ull_aligned(m, s, num << (PAGE_SHIFT - 10), 8);
 	seq_write(m, " kB\n", 4);
 }
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3a08685ef27c..cc0aaf3a7315 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -24,6 +24,8 @@ 
 #include <asm/tlbflush.h>
 #include "internal.h"
 
+#define SEQ_PUT_DEC(str, val) \
+		seq_put_decimal_ull_aligned(m, str, (val) << (PAGE_SHIFT-10), 8)
 void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
 	unsigned long text, lib, swap, anon, file, shmem;
@@ -50,37 +52,28 @@  void task_mem(struct seq_file *m, struct mm_struct *mm)
 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
 	swap = get_mm_counter(mm, MM_SWAPENTS);
-	seq_printf(m,
-		"VmPeak:\t%8lu kB\n"
-		"VmSize:\t%8lu kB\n"
-		"VmLck:\t%8lu kB\n"
-		"VmPin:\t%8lu kB\n"
-		"VmHWM:\t%8lu kB\n"
-		"VmRSS:\t%8lu kB\n"
-		"RssAnon:\t%8lu kB\n"
-		"RssFile:\t%8lu kB\n"
-		"RssShmem:\t%8lu kB\n"
-		"VmData:\t%8lu kB\n"
-		"VmStk:\t%8lu kB\n"
-		"VmExe:\t%8lu kB\n"
-		"VmLib:\t%8lu kB\n"
-		"VmPTE:\t%8lu kB\n"
-		"VmSwap:\t%8lu kB\n",
-		hiwater_vm << (PAGE_SHIFT-10),
-		total_vm << (PAGE_SHIFT-10),
-		mm->locked_vm << (PAGE_SHIFT-10),
-		mm->pinned_vm << (PAGE_SHIFT-10),
-		hiwater_rss << (PAGE_SHIFT-10),
-		total_rss << (PAGE_SHIFT-10),
-		anon << (PAGE_SHIFT-10),
-		file << (PAGE_SHIFT-10),
-		shmem << (PAGE_SHIFT-10),
-		mm->data_vm << (PAGE_SHIFT-10),
-		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
-		mm_pgtables_bytes(mm) >> 10,
-		swap << (PAGE_SHIFT-10));
+	SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
+	SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
+	SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
+	SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
+	SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
+	SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
+	SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
+	SEQ_PUT_DEC(" kB\nRssFile:\t", file);
+	SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem);
+	SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm);
+	SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm);
+	seq_put_decimal_ull_aligned(m,
+		    " kB\nVmExe:\t", text, 8);
+	seq_put_decimal_ull_aligned(m,
+		    " kB\nVmLib:\t", lib, 8);
+	seq_put_decimal_ull_aligned(m,
+		    " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8);
+	SEQ_PUT_DEC(" kB\nVmSwap:\t", swap);
+	seq_puts(m, " kB\n");
 	hugetlb_report_usage(m, mm);
 }
+#undef SEQ_PUT_DEC
 
 unsigned long task_vsize(struct mm_struct *mm)
 {
@@ -734,6 +727,8 @@  void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
 {
 }
 
+#define SEQ_PUT_DEC(str, val) \
+		seq_put_decimal_ull_aligned(m, str, (val) >> 10, 8)
 static int show_smap(struct seq_file *m, void *v, int is_pid)
 {
 	struct proc_maps_private *priv = m->private;
@@ -807,51 +802,33 @@  static int show_smap(struct seq_file *m, void *v, int is_pid)
 		ret = SEQ_SKIP;
 	}
 
-	if (!rollup_mode)
-		seq_printf(m,
-			   "Size:           %8lu kB\n"
-			   "KernelPageSize: %8lu kB\n"
-			   "MMUPageSize:    %8lu kB\n",
-			   (vma->vm_end - vma->vm_start) >> 10,
-			   vma_kernel_pagesize(vma) >> 10,
-			   vma_mmu_pagesize(vma) >> 10);
-
-
-	if (!rollup_mode || last_vma)
-		seq_printf(m,
-			   "Rss:            %8lu kB\n"
-			   "Pss:            %8lu kB\n"
-			   "Shared_Clean:   %8lu kB\n"
-			   "Shared_Dirty:   %8lu kB\n"
-			   "Private_Clean:  %8lu kB\n"
-			   "Private_Dirty:  %8lu kB\n"
-			   "Referenced:     %8lu kB\n"
-			   "Anonymous:      %8lu kB\n"
-			   "LazyFree:       %8lu kB\n"
-			   "AnonHugePages:  %8lu kB\n"
-			   "ShmemPmdMapped: %8lu kB\n"
-			   "Shared_Hugetlb: %8lu kB\n"
-			   "Private_Hugetlb: %7lu kB\n"
-			   "Swap:           %8lu kB\n"
-			   "SwapPss:        %8lu kB\n"
-			   "Locked:         %8lu kB\n",
-			   mss->resident >> 10,
-			   (unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
-			   mss->shared_clean  >> 10,
-			   mss->shared_dirty  >> 10,
-			   mss->private_clean >> 10,
-			   mss->private_dirty >> 10,
-			   mss->referenced >> 10,
-			   mss->anonymous >> 10,
-			   mss->lazyfree >> 10,
-			   mss->anonymous_thp >> 10,
-			   mss->shmem_thp >> 10,
-			   mss->shared_hugetlb >> 10,
-			   mss->private_hugetlb >> 10,
-			   mss->swap >> 10,
-			   (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
-			   (unsigned long)(mss->pss >> (10 + PSS_SHIFT)));
+	if (!rollup_mode) {
+		SEQ_PUT_DEC("Size:           ", vma->vm_end - vma->vm_start);
+		SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
+		SEQ_PUT_DEC(" kB\nMMUPageSize:    ", vma_mmu_pagesize(vma));
+		seq_puts(m, " kB\n");
+	}
 
+	if (!rollup_mode || last_vma) {
+		SEQ_PUT_DEC("Rss:            ", mss->resident);
+		SEQ_PUT_DEC(" kB\nPss:            ", mss->pss >> PSS_SHIFT);
+		SEQ_PUT_DEC(" kB\nShared_Clean:   ", mss->shared_clean);
+		SEQ_PUT_DEC(" kB\nShared_Dirty:   ", mss->shared_dirty);
+		SEQ_PUT_DEC(" kB\nPrivate_Clean:  ", mss->private_clean);
+		SEQ_PUT_DEC(" kB\nPrivate_Dirty:  ", mss->private_dirty);
+		SEQ_PUT_DEC(" kB\nReferenced:     ", mss->referenced);
+		SEQ_PUT_DEC(" kB\nAnonymous:      ", mss->anonymous);
+		SEQ_PUT_DEC(" kB\nLazyFree:       ", mss->lazyfree);
+		SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
+		SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+		SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
+		seq_put_decimal_ull_aligned(m,
+			     " kB\nPrivate_Hugetlb: ", mss->private_hugetlb >> 10, 7);
+		SEQ_PUT_DEC(" kB\nSwap:           ", mss->swap);
+		SEQ_PUT_DEC(" kB\nSwapPss:        ", mss->swap_pss >> PSS_SHIFT);
+		SEQ_PUT_DEC(" kB\nLocked:         ", mss->pss >> PSS_SHIFT);
+		seq_puts(m, " kB\n");
+	}
 	if (!rollup_mode) {
 		arch_show_smap(m, vma);
 		show_smap_vma_flags(m, vma);
@@ -859,6 +836,7 @@  static int show_smap(struct seq_file *m, void *v, int is_pid)
 	m_cache_vma(m, vma);
 	return ret;
 }
+#undef SEQ_PUT_DEC
 
 static int show_pid_smap(struct seq_file *m, void *v)
 {
diff --git a/fs/seq_file.c b/fs/seq_file.c
index d2885220ecf7..55c89eb95a90 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -677,8 +677,8 @@  EXPORT_SYMBOL(seq_puts);
  * This routine is very quick when you show lots of numbers.
  * In usual cases, it will be better to use seq_printf(). It's easier to read.
  */
-void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
-			 unsigned long long num)
+void seq_put_decimal_ull_aligned(struct seq_file *m, const char *delimiter,
+			 unsigned long long num, int width)
 {
 	int len;
 
@@ -692,15 +692,23 @@  void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 	memcpy(m->buf + m->count, delimiter, len);
 	m->count += len;
 
-	if (m->count + 1 >= m->size)
+	if (!width)
+		width = 1;
+
+	if (m->count + width >= m->size)
 		goto overflow;
 
 	if (num < 10) {
+		int i;
+
+		for (i = 0; i < width - 1; i++)
+			m->buf[m->count++] = ' ';
+
 		m->buf[m->count++] = num + '0';
 		return;
 	}
 
-	len = num_to_str(m->buf + m->count, m->size - m->count, num);
+	len = num_to_str(m->buf + m->count, m->size - m->count, num, width);
 	if (!len)
 		goto overflow;
 
@@ -710,6 +718,12 @@  void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 overflow:
 	seq_set_overflow(m);
 }
+
+void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
+			 unsigned long long num)
+{
+	return seq_put_decimal_ull_aligned(m, delimiter, num, 0);
+}
 EXPORT_SYMBOL(seq_put_decimal_ull);
 
 /**
@@ -784,7 +798,7 @@  void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
 		return;
 	}
 
-	len = num_to_str(m->buf + m->count, m->size - m->count, num);
+	len = num_to_str(m->buf + m->count, m->size - m->count, num, 0);
 	if (!len)
 		goto overflow;
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ce51455e2adf..6485a32db7d5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -439,7 +439,7 @@  extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
 extern long long simple_strtoll(const char *,char **,unsigned int);
 
-extern int num_to_str(char *buf, int size, unsigned long long num);
+extern int num_to_str(char *buf, int size, unsigned long long num, int width);
 
 /* lib/printf utilities */
 
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 53f238934d7f..521de9e41227 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,6 +118,8 @@  __printf(2, 3)
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 void seq_putc(struct seq_file *m, char c);
 void seq_puts(struct seq_file *m, const char *s);
+void seq_put_decimal_ull_aligned(struct seq_file *m, const char *delimiter,
+			 unsigned long long num, int width);
 void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 			 unsigned long long num);
 void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 01c3957b2de6..b8015e165a54 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -337,7 +337,7 @@  char *put_dec(char *buf, unsigned long long n)
  *
  * If speed is not important, use snprintf(). It's easy to read the code.
  */
-int num_to_str(char *buf, int size, unsigned long long num)
+int num_to_str(char *buf, int size, unsigned long long num, int width)
 {
 	/* put_dec requires 2-byte alignment of the buffer. */
 	char tmp[sizeof(num) * 3] __aligned(2);
@@ -351,11 +351,21 @@  int num_to_str(char *buf, int size, unsigned long long num)
 		len = put_dec(tmp, num) - tmp;
 	}
 
-	if (len > size)
+	if (len > size || width > size)
 		return 0;
+
+	if (width > len) {
+		width = width - len;
+		for (idx = 0; idx < width; idx++)
+			buf[idx] = ' ';
+	} else {
+		width = 0;
+	}
+
 	for (idx = 0; idx < len; ++idx)
-		buf[idx] = tmp[len - idx - 1];
-	return len;
+		buf[idx + width] = tmp[len - idx - 1];
+
+	return len + width;
 }
 
 #define SIGN	1		/* unsigned/signed, must be 1 */