diff mbox series

redir: Use memfd_create instead of pipe

Message ID ZhuLCZfwRfflgv0v@gondor.apana.org.au (mailing list archive)
State Superseded
Delegated to: Herbert Xu
Headers show
Series redir: Use memfd_create instead of pipe | expand

Commit Message

Herbert Xu April 14, 2024, 7:51 a.m. UTC
Use memfd_create(2) instead of pipe(2).  With pipe(2), a fork
is required if the amount of data to be written exceeds the pipe
size.  This is not the case with memfd_create.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 configure.ac |  2 +-
 src/eval.c   |  3 +--
 src/redir.c  | 57 ++++++++++++++++++++++++++++++++++------------------
 src/redir.h  |  1 +
 src/system.h |  7 +++++++
 5 files changed, 47 insertions(+), 23 deletions(-)

Comments

Jilles Tjoelker April 19, 2024, 9:24 p.m. UTC | #1
On Sun, Apr 14, 2024 at 03:51:37PM +0800, Herbert Xu wrote:
> Use memfd_create(2) instead of pipe(2).  With pipe(2), a fork
> is required if the amount of data to be written exceeds the pipe
> size.  This is not the case with memfd_create.

Since a memfd does not behave identically to a pipe, this should be
tested carefully. A memfd does not behave identically to a regular file
either. It may affect programs started from the shell that read
here-documents.

Using pipe or memfd conditionally based on the length of the
here-document and whether memfd_create(2) fails transiently might cause
even more obscure issues.

I suggest using either a pipe for all here-documents or a memfd for all
here-documents. The shell should fall back from memfd_create(2) to
pipe(2) only if memfd_create(2) is not supported or fails for a
persistent reason like [ENOSYS] or [EPERM]; in this case, the shell
should remember the failure and immediately use pipe(2) for subsequent
here-documents.

The dup/close dance with the memfd will look silly in syscall traces,
although it is functionally fine.
Herbert Xu April 20, 2024, 12:15 a.m. UTC | #2
On Fri, Apr 19, 2024 at 11:24:46PM +0200, Jilles Tjoelker wrote:
>
> Using pipe or memfd conditionally based on the length of the
> here-document and whether memfd_create(2) fails transiently might cause
> even more obscure issues.

The reason is that memfd ends up being a lot slower than pipe.
Of course it's still way faster than fork + pipe, but until it
catches up with pipe, we're better off using both.

Cheers,
diff mbox series

Patch

diff --git a/configure.ac b/configure.ac
index 6993364..cb55c3f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -87,7 +87,7 @@  AC_CHECK_DECL([PRIdMAX],,
 
 dnl Checks for library functions.
 AC_CHECK_FUNCS(bsearch faccessat getpwnam getrlimit isalpha killpg \
-	       mempcpy \
+	       memfd_create mempcpy \
 	       sigsetmask stpcpy strchrnul strsignal strtod strtoimax \
 	       strtoumax sysconf)
 
diff --git a/src/eval.c b/src/eval.c
index 978a174..fce5314 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -635,8 +635,7 @@  evalbackcmd(union node *n, struct backcmd *result)
 		goto out;
 	}
 
-	if (pipe(pip) < 0)
-		sh_error("Pipe call failed");
+	sh_pipe(pip, 0);
 	jp = makejob(1);
 	if (forkshell(jp, n, FORK_NOJOB) == 0) {
 		FORCEINTON;
diff --git a/src/redir.c b/src/redir.c
index bcc81b4..12233be 100644
--- a/src/redir.c
+++ b/src/redir.c
@@ -32,6 +32,7 @@ 
  * SUCH DAMAGE.
  */
 
+#include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/param.h>	/* PIPE_BUF */
@@ -280,6 +281,17 @@  ecreate:
 	sh_open_fail(fname, O_CREAT, EEXIST);
 }
 
+static int sh_dup2(int ofd, int nfd)
+{
+	if (nfd < 0)
+		nfd = dup(ofd);
+	else
+		nfd = dup2(ofd, nfd);
+	if (nfd < 0)
+		sh_error("%d: %s", ofd, strerror(errno));
+
+	return nfd;
+}
 
 #ifdef notyet
 static void dupredirect(union node *redir, int f, char memory[10])
@@ -288,7 +300,6 @@  static void dupredirect(union node *redir, int f)
 #endif
 {
 	int fd = redir->nfile.fd;
-	int err = 0;
 
 #ifdef notyet
 	memory[fd] = 0;
@@ -301,26 +312,31 @@  static void dupredirect(union node *redir, int f)
 				memory[fd] = 1;
 			else
 #endif
-				if (dup2(f, fd) < 0) {
-					err = errno;
-					goto err;
-				}
+				sh_dup2(f, fd);
 			return;
 		}
 		f = fd;
-	} else if (dup2(f, fd) < 0)
-		err = errno;
+	} else
+		sh_dup2(f, fd);
 
 	close(f);
-	if (err < 0)
-		goto err;
-
-	return;
-
-err:
-	sh_error("%d: %s", f, strerror(err));
 }
 
+int sh_pipe(int pip[2], int memfd)
+{
+	if (memfd) {
+		pip[0] = memfd_create("dash", 0);
+		if (pip[0] >= 0) {
+			pip[1] = sh_dup2(pip[0], -1);
+			return 1;
+		}
+	}
+
+	if (pipe(pip) < 0)
+		sh_error("Pipe call failed");
+
+	return 0;
+}
 
 /*
  * Handle here documents.  Normally we fork off a process to write the
@@ -331,9 +347,10 @@  err:
 STATIC int
 openhere(union node *redir)
 {
-	char *p;
-	int pip[2];
 	size_t len = 0;
+	int pip[2];
+	int memfd;
+	char *p;
 
 	p = redir->nhere.doc->narg.text;
 	if (redir->type == NXHERE) {
@@ -341,12 +358,12 @@  openhere(union node *redir)
 		p = stackblock();
 	}
 
-	if (pipe(pip) < 0)
-		sh_error("Pipe call failed");
-
 	len = strlen(p);
-	if (len <= PIPESIZE) {
+	memfd = sh_pipe(pip, len > PIPESIZE);
+
+	if (memfd || len <= PIPESIZE) {
 		xwrite(pip[1], p, len);
+		lseek(pip[1], 0, SEEK_SET);
 		goto out;
 	}
 
diff --git a/src/redir.h b/src/redir.h
index 16f5c20..0be5f1a 100644
--- a/src/redir.h
+++ b/src/redir.h
@@ -50,4 +50,5 @@  int redirectsafe(union node *, int);
 void unwindredir(struct redirtab *stop);
 struct redirtab *pushredir(union node *redir);
 int sh_open(const char *pathname, int flags, int mayfail);
+int sh_pipe(int pip[2], int memfd);
 
diff --git a/src/system.h b/src/system.h
index 007952c..371c64b 100644
--- a/src/system.h
+++ b/src/system.h
@@ -54,6 +54,13 @@  static inline void sigclearmask(void)
 #endif
 }
 
+#ifndef HAVE_MEMFD_CREATE
+static inline int memfd_create(const char *name, unsigned int flags)
+{
+	return -1;
+}
+#endif
+
 #ifndef HAVE_MEMPCPY
 void *mempcpy(void *, const void *, size_t);
 #endif