diff mbox series

[v2,1/2] seq_file: Optimize seq_puts()

Message ID a8589bffe4830dafcb9111e22acf06603fea7132.1713781332.git.christophe.jaillet@wanadoo.fr (mailing list archive)
State New
Headers show
Series [v2,1/2] seq_file: Optimize seq_puts() | expand

Commit Message

Christophe JAILLET April 22, 2024, 10:24 a.m. UTC
Most of seq_puts() usages are done with a string literal. In such cases,
the length of the string car be computed at compile time in order to save
a strlen() call at run-time. seq_putc() or seq_write() can then be used
instead.

This saves a few cycles.

To have an estimation of how often this optimization triggers:
   $ git grep seq_puts.*\" | wc -l
   3436

   $ git grep seq_puts.*\".\" | wc -l
   84

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
---
Changes in v2:
   - Use a function, instead of a macro   [Al Viro]
   - Handle the case of 1 char only strings, in order to use seq_putc()   [Al Viro]
   - Use __always_inline   [David Laight]

V1: https://lore.kernel.org/all/5c4f7ad7b88f5026940efa9c8be36a58755ec1b3.1704374916.git.christophe.jaillet@wanadoo.fr/


Checked by comparing the output of a few .s files.
Here is one of these outputs:

$ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w

 .L2918:
@@ -46072,10 +46073,11 @@
 	call	clk_prepare_unlock	#
 # drivers/clk/clk.c:3438: 	clk_pm_runtime_put_all();
 	call	clk_pm_runtime_put_all	#
-# drivers/clk/clk.c:3440: 	seq_puts(s, "}\n");
+# ./include/linux/seq_file.h:130: 		seq_write(m, s, __builtin_strlen(s));
+	movl	$2, %edx	#,
 	movq	$.LC94, %rsi	#,
 	movq	%rbp, %rdi	# s,
-	call	seq_puts	#
+	call	seq_write	#
 # drivers/clk/clk.c:3441: 	return 0;
 	jmp	.L2917	#
 	.size	clk_dump_show, .-clk_dump_show
@@ -46200,7 +46202,7 @@
 	leaq	128(%rbx), %r15	#, _97
 	movq	%r15, %rdi	# _97,
--
 # drivers/clk/clk.c:1987: 		__clk_recalc_rates(core, false, 0);
@@ -46480,15 +46482,17 @@
 	call	seq_printf	#
 	jmp	.L2950	#
 .L2946:
-# drivers/clk/clk.c:3315: 		seq_puts(s, "-----");
+# ./include/linux/seq_file.h:130: 		seq_write(m, s, __builtin_strlen(s));
 	call	__sanitizer_cov_trace_pc	#
+	movl	$5, %edx	#,
 	movq	$.LC100, %rsi	#,
 	movq	%rbp, %rdi	# s,
-	call	seq_puts	#
+	call	seq_write	#
+# ./include/linux/seq_file.h:131: }
 	jmp	.L2947	#
 .L2957:
 # drivers/clk/clk.c:1903: 	return clk_core_get_accuracy_no_lock(core);
-	xorl	%r14d, %r14d	# _34
+	xorl	%r14d, %r14d	# _35
--
@@ -46736,21 +46740,22 @@
 	call	__sanitizer_cov_trace_pc	#
 	leaq	240(%r12), %rdi	#, tmp101
 	call	__tsan_read8	#
-# drivers/clk/clk.c:3355: 	seq_puts(s, "                                 enable  prepare  protect                                duty  hardware                            connection\n");
-	movq	$.LC104, %rsi	#,
+# ./include/linux/seq_file.h:130: 		seq_write(m, s, __builtin_strlen(s));
+	movl	$142, %edx	#,
 	movq	%r12, %rdi	# s,
+	movq	$.LC104, %rsi	#,
 # drivers/clk/clk.c:3352: 	struct hlist_head **lists = s->private;
 	movq	240(%r12), %r13	# s_10(D)->private, lists
-# drivers/clk/clk.c:3355: 	seq_puts(s, "                                 enable  prepare  protect                                duty  hardware                            connection\n");
-	call	seq_puts	#
-# drivers/clk/clk.c:3356: 	seq_puts(s, "   clock                          count    count    count        rate   accuracy phase  cycle    enable   consumer                         id\n");
+# ./include/linux/seq_file.h:130: 		seq_write(m, s, __builtin_strlen(s));
+	call	seq_write	#
+	movl	$142, %edx	#,
 	movq	$.LC105, %rsi	#,
 	movq	%r12, %rdi	# s,
-	call	seq_puts	#
-# drivers/clk/clk.c:3357: 	seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
+	call	seq_write	#
 	movq	$.LC106, %rsi	#,
 	movq	%r12, %rdi	# s,
-	call	seq_puts	#
+	movl	$142, %edx	#,
+	call	seq_write	#
 # drivers/clk/clk.c:3359: 	ret = clk_pm_runtime_get_all();
 	call	clk_pm_runtime_get_all	#
 # drivers/clk/clk.c:3360: 	if (ret)
@@ -57943,7 +57948,7 @@
 	subq	$88, %rsp	#,
 # drivers/clk/clk.c:5338: {


The output for seq_putc() generation has also be checked and works.
---
 fs/seq_file.c            |  4 ++--
 include/linux/seq_file.h | 13 ++++++++++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

Comments

Christian Brauner May 2, 2024, 2:31 p.m. UTC | #1
On Mon, 22 Apr 2024 12:24:06 +0200, Christophe JAILLET wrote:
> Most of seq_puts() usages are done with a string literal. In such cases,
> the length of the string car be computed at compile time in order to save
> a strlen() call at run-time. seq_putc() or seq_write() can then be used
> instead.
> 
> This saves a few cycles.
> 
> [...]

Applied to the vfs.misc branch of the vfs/vfs.git tree.
Patches in the vfs.misc branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs.misc

[1/2] seq_file: Optimize seq_puts()
      https://git.kernel.org/vfs/vfs/c/45751097aeb3
[2/2] seq_file: Simplify __seq_puts()
      https://git.kernel.org/vfs/vfs/c/e035af9f6eba
diff mbox series

Patch

diff --git a/fs/seq_file.c b/fs/seq_file.c
index f5fdaf3b1572..8ef0a07033ca 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -669,7 +669,7 @@  void seq_putc(struct seq_file *m, char c)
 }
 EXPORT_SYMBOL(seq_putc);
 
-void seq_puts(struct seq_file *m, const char *s)
+void __seq_puts(struct seq_file *m, const char *s)
 {
 	int len = strlen(s);
 
@@ -680,7 +680,7 @@  void seq_puts(struct seq_file *m, const char *s)
 	memcpy(m->buf + m->count, s, len);
 	m->count += len;
 }
-EXPORT_SYMBOL(seq_puts);
+EXPORT_SYMBOL(__seq_puts);
 
 /**
  * seq_put_decimal_ull_width - A helper routine for putting decimal numbers
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 234bcdb1fba4..8bd4fda6e027 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,7 +118,18 @@  void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
 __printf(2, 3)
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 void seq_putc(struct seq_file *m, char c);
-void seq_puts(struct seq_file *m, const char *s);
+void __seq_puts(struct seq_file *m, const char *s);
+
+static __always_inline void seq_puts(struct seq_file *m, const char *s)
+{
+	if (!__builtin_constant_p(*s))
+		__seq_puts(m, s);
+	else if (s[0] && !s[1])
+		seq_putc(m, s[0]);
+	else
+		seq_write(m, s, __builtin_strlen(s));
+}
+
 void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
 			       unsigned long long num, unsigned int width);
 void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,