diff mbox series

proc: give /proc/cmdline size

Message ID YxoywlbM73JJN3r+@localhost.localdomain (mailing list archive)
State New, archived
Headers show
Series proc: give /proc/cmdline size | expand

Commit Message

Alexey Dobriyan Sept. 8, 2022, 6:21 p.m. UTC
Most /proc files don't have length (in fstat sense). This leads
to inefficiencies when reading such files with APIs commonly found in
modern programming languages. They open file, then fstat descriptor,
get st_size == 0 and either assume file is empty or start reading
without knowing target size.

cat(1) does OK because it uses large enough buffer by default.
But naive programs copy-pasted from SO aren't:

	let mut f = std::fs::File::open("/proc/cmdline").unwrap();
	let mut buf: Vec<u8> = Vec::new();
	f.read_to_end(&mut buf).unwrap();

will result in

	openat(AT_FDCWD, "/proc/cmdline", O_RDONLY|O_CLOEXEC) = 3
	statx(0, NULL, AT_STATX_SYNC_AS_STAT, STATX_ALL, NULL) = -1 EFAULT (Bad address)
	statx(3, "", AT_STATX_SYNC_AS_STAT|AT_EMPTY_PATH, STATX_ALL, {stx_mask=STATX_BASIC_STATS|STATX_MNT_ID, stx_attributes=0, stx_mode=S_IFREG|0444, stx_size=0, ...}) = 0
	lseek(3, 0, SEEK_CUR)                   = 0
	read(3, "BOOT_IMAGE=(hd3,gpt2)/vmlinuz-5.", 32) = 32
	read(3, "19.6-100.fc35.x86_64 root=/dev/m", 32) = 32
	read(3, "apper/fedora_localhost--live-roo"..., 64) = 64
	read(3, "ocalhost--live-swap rd.lvm.lv=fe"..., 128) = 116
	read(3, "", 12)

open/stat is OK, lseek looks silly but there are 3 unnecessary reads
because Rust starts with 32 bytes per Vec<u8> and grows from there.

In case of /proc/cmdline, the length is known precisely.

Make variables readonly while I'm at it.

P.S.: I tried to scp /proc/cpuinfo today and got empty file
	but this is separate story.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 fs/proc/cmdline.c    |    6 +++++-
 include/linux/init.h |    1 +
 init/main.c          |    7 +++++--
 3 files changed, 11 insertions(+), 3 deletions(-)

Comments

Andrew Morton Sept. 8, 2022, 8:45 p.m. UTC | #1
On Thu, 8 Sep 2022 21:21:54 +0300 Alexey Dobriyan <adobriyan@gmail.com> wrote:

> Most /proc files don't have length (in fstat sense). This leads
> to inefficiencies when reading such files with APIs commonly found in
> modern programming languages. They open file, then fstat descriptor,
> get st_size == 0 and either assume file is empty or start reading
> without knowing target size.
> 
> cat(1) does OK because it uses large enough buffer by default.
> But naive programs copy-pasted from SO aren't:

What is "SO"?

> 	let mut f = std::fs::File::open("/proc/cmdline").unwrap();
> 	let mut buf: Vec<u8> = Vec::new();
> 	f.read_to_end(&mut buf).unwrap();
> 
> will result in
> 
> 	openat(AT_FDCWD, "/proc/cmdline", O_RDONLY|O_CLOEXEC) = 3
> 	statx(0, NULL, AT_STATX_SYNC_AS_STAT, STATX_ALL, NULL) = -1 EFAULT (Bad address)
> 	statx(3, "", AT_STATX_SYNC_AS_STAT|AT_EMPTY_PATH, STATX_ALL, {stx_mask=STATX_BASIC_STATS|STATX_MNT_ID, stx_attributes=0, stx_mode=S_IFREG|0444, stx_size=0, ...}) = 0
> 	lseek(3, 0, SEEK_CUR)                   = 0
> 	read(3, "BOOT_IMAGE=(hd3,gpt2)/vmlinuz-5.", 32) = 32
> 	read(3, "19.6-100.fc35.x86_64 root=/dev/m", 32) = 32
> 	read(3, "apper/fedora_localhost--live-roo"..., 64) = 64
> 	read(3, "ocalhost--live-swap rd.lvm.lv=fe"..., 128) = 116
> 	read(3, "", 12)
> 
> open/stat is OK, lseek looks silly but there are 3 unnecessary reads
> because Rust starts with 32 bytes per Vec<u8> and grows from there.
> 
> In case of /proc/cmdline, the length is known precisely.
> 
> Make variables readonly while I'm at it.

It seems arbitrary.  Why does /proc/cmdline in particular get this
treatment?
Alexey Dobriyan Sept. 9, 2022, 5:07 a.m. UTC | #2
On Thu, Sep 08, 2022 at 01:45:46PM -0700, Andrew Morton wrote:
> On Thu, 8 Sep 2022 21:21:54 +0300 Alexey Dobriyan <adobriyan@gmail.com> wrote:
> 
> > Most /proc files don't have length (in fstat sense). This leads
> > to inefficiencies when reading such files with APIs commonly found in
> > modern programming languages. They open file, then fstat descriptor,
> > get st_size == 0 and either assume file is empty or start reading
> > without knowing target size.
> > 
> > cat(1) does OK because it uses large enough buffer by default.
> > But naive programs copy-pasted from SO aren't:
> 
> What is "SO"?

StackOverflow, the source of all best programs in the world!

> > 	let mut f = std::fs::File::open("/proc/cmdline").unwrap();
> > 	let mut buf: Vec<u8> = Vec::new();
> > 	f.read_to_end(&mut buf).unwrap();
> > 
> > will result in
> > 
> > 	openat(AT_FDCWD, "/proc/cmdline", O_RDONLY|O_CLOEXEC) = 3
> > 	statx(0, NULL, AT_STATX_SYNC_AS_STAT, STATX_ALL, NULL) = -1 EFAULT (Bad address)
> > 	statx(3, "", AT_STATX_SYNC_AS_STAT|AT_EMPTY_PATH, STATX_ALL, {stx_mask=STATX_BASIC_STATS|STATX_MNT_ID, stx_attributes=0, stx_mode=S_IFREG|0444, stx_size=0, ...}) = 0
> > 	lseek(3, 0, SEEK_CUR)                   = 0
> > 	read(3, "BOOT_IMAGE=(hd3,gpt2)/vmlinuz-5.", 32) = 32
> > 	read(3, "19.6-100.fc35.x86_64 root=/dev/m", 32) = 32
> > 	read(3, "apper/fedora_localhost--live-roo"..., 64) = 64
> > 	read(3, "ocalhost--live-swap rd.lvm.lv=fe"..., 128) = 116
> > 	read(3, "", 12)
> > 
> > open/stat is OK, lseek looks silly but there are 3 unnecessary reads
> > because Rust starts with 32 bytes per Vec<u8> and grows from there.
> > 
> > In case of /proc/cmdline, the length is known precisely.
> > 
> > Make variables readonly while I'm at it.
> 
> It seems arbitrary.  Why does /proc/cmdline in particular get this
> treatment?

We can calculate its length precisely and show to userspace so why not
do it. Other /proc files are trickier.
David Laight Sept. 9, 2022, 7:37 a.m. UTC | #3
From: Alexey Dobriyan
> Sent: 09 September 2022 06:07
> 
> On Thu, Sep 08, 2022 at 01:45:46PM -0700, Andrew Morton wrote:
> > On Thu, 8 Sep 2022 21:21:54 +0300 Alexey Dobriyan <adobriyan@gmail.com> wrote:
> >
> > > Most /proc files don't have length (in fstat sense). This leads
> > > to inefficiencies when reading such files with APIs commonly found in
> > > modern programming languages. They open file, then fstat descriptor,
> > > get st_size == 0 and either assume file is empty or start reading
> > > without knowing target size.
> > >
> > > cat(1) does OK because it uses large enough buffer by default.
> > > But naive programs copy-pasted from SO aren't:
> >
> > What is "SO"?
> 
> StackOverflow, the source of all best programs in the world!

Anyone directly copying anything from there gets what
they deserve.

Useful for hints, but they are so often wrong.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
diff mbox series

Patch

--- a/fs/proc/cmdline.c
+++ b/fs/proc/cmdline.c
@@ -3,6 +3,7 @@ 
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include "internal.h"
 
 static int cmdline_proc_show(struct seq_file *m, void *v)
 {
@@ -13,7 +14,10 @@  static int cmdline_proc_show(struct seq_file *m, void *v)
 
 static int __init proc_cmdline_init(void)
 {
-	proc_create_single("cmdline", 0, NULL, cmdline_proc_show);
+	struct proc_dir_entry *pde;
+
+	pde = proc_create_single("cmdline", 0, NULL, cmdline_proc_show);
+	pde->size = saved_command_line_len + 1;
 	return 0;
 }
 fs_initcall(proc_cmdline_init);
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -143,6 +143,7 @@  struct file_system_type;
 extern int do_one_initcall(initcall_t fn);
 extern char __initdata boot_command_line[];
 extern char *saved_command_line;
+extern unsigned int saved_command_line_len;
 extern unsigned int reset_devices;
 
 /* used by init/main.c */
--- a/init/main.c
+++ b/init/main.c
@@ -143,7 +143,8 @@  void (*__initdata late_time_init)(void);
 /* Untouched command line saved by arch-specific code. */
 char __initdata boot_command_line[COMMAND_LINE_SIZE];
 /* Untouched saved command line (eg. for /proc) */
-char *saved_command_line;
+char *saved_command_line __ro_after_init;
+unsigned int saved_command_line_len __ro_after_init;
 /* Command line for parameter parsing */
 static char *static_command_line;
 /* Untouched extra command line */
@@ -665,6 +666,8 @@  static void __init setup_command_line(char *command_line)
 			strcpy(saved_command_line + len, extra_init_args);
 		}
 	}
+
+	saved_command_line_len = strlen(saved_command_line);
 }
 
 /*
@@ -1372,7 +1375,7 @@  static void __init do_initcall_level(int level, char *command_line)
 static void __init do_initcalls(void)
 {
 	int level;
-	size_t len = strlen(saved_command_line) + 1;
+	size_t len = saved_command_line_len + 1;
 	char *command_line;
 
 	command_line = kzalloc(len, GFP_KERNEL);