@@ -44,6 +44,8 @@ static bool terminate;
static void drain_output(bool flush);
+static int startup_pipe[2];
+
static int num_processors(void)
{
long nproc = sysconf(_SC_NPROCESSORS_CONF);
@@ -81,13 +83,37 @@ static void child_start(struct child_data *child, const char *program)
exit(EXIT_FAILURE);
}
+ /*
+ * Duplicate the read side of the startup pipe to
+ * FD 3 so we can close everything else.
+ */
+ ret = dup2(startup_pipe[0], 3);
+ if (ret == -1) {
+ fprintf(stderr, "dup2() %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
/*
* Very dumb mechanism to clean open FDs other than
* stdio. We don't want O_CLOEXEC for the pipes...
*/
- for (i = 3; i < 8192; i++)
+ for (i = 4; i < 8192; i++)
close(i);
+ /*
+ * Read from the startup pipe, there should be no data
+ * and we should block until it is closed. We just
+ * carry on on error since this isn't super critical.
+ */
+ ret = read(3, &i, sizeof(i));
+ if (ret < 0)
+ fprintf(stderr, "read(startp pipe) failed: %s (%d)\n",
+ strerror(errno), errno);
+ if (ret > 0)
+ fprintf(stderr, "%d bytes of data on startup pipe\n",
+ ret);
+ close(3);
+
ret = execl(program, program, NULL);
fprintf(stderr, "execl(%s) failed: %d (%s)\n",
program, errno, strerror(errno));
@@ -465,6 +491,12 @@ int main(int argc, char **argv)
strerror(errno), ret);
epoll_fd = ret;
+ /* Create a pipe which children will block on before execing */
+ ret = pipe(startup_pipe);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to create startup pipe: %s (%d)\n",
+ strerror(errno), errno);
+
/* Get signal handers ready before we start any children */
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = handle_exit_signal;
@@ -497,6 +529,13 @@ int main(int argc, char **argv)
}
}
+ /*
+ * All children started, close the startup pipe and let them
+ * run.
+ */
+ close(startup_pipe[0]);
+ close(startup_pipe[1]);
+
for (;;) {
/* Did we get a signal asking us to exit? */
if (terminate)
At present fp-stress has a bit of a thundering herd problem since the children it spawns start running immediately, meaning that they can start starving the parent process of CPU before it has even started all the children. This is much more severe on virtual platforms since they tend to support far more SVE and SME vector lengths, be slower in general and for some have issues with performance when simulating multiple CPUs. We can mitigate this problem by having all the child processes block before starting the test program, meaning that we at least have all the child processes started before we start heavily using CPU. We still have the same load issues while waiting for the actual stress test programs to start up and produce output but they're at least all ready to go before that kicks in, resulting in substantial reductions in overall runtime on some of the severely affected systems. One test was showing about 20% improvement. Signed-off-by: Mark Brown <broonie@kernel.org> --- tools/testing/selftests/arm64/fp/fp-stress.c | 41 +++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-)