diff mbox series

[v1] selftests/harness: Fix tests timeout and race condition

Message ID 20240621180605.834676-1-mic@digikod.net (mailing list archive)
State New
Headers show
Series [v1] selftests/harness: Fix tests timeout and race condition | expand

Commit Message

Mickaël Salaün June 21, 2024, 6:06 p.m. UTC
We cannot use CLONE_VFORK because we also need to wait for the timeout
signal.

Restore tests timeout by using the original fork() call in __run_test()
but also in __TEST_F_IMPL().  Also fix a race condition when waiting for
the test child process.

Because test metadata are shared between test processes, only the
parent process must set the test PID (child).  Otherwise, t->pid may be
set to zero, leading to inconsistent error cases:

  #  RUN           layout1.rule_on_mountpoint ...
  # rule_on_mountpoint: Test ended in some other way [127]
  #            OK  layout1.rule_on_mountpoint
  ok 20 layout1.rule_on_mountpoint

As safeguards, initialize the "status" variable with a valid exit code,
and handle unknown test exits as errors.

The use of fork() introduces a new race condition in landlock/fs_test.c
which seems to be specific to hostfs bind mounts, but I haven't found
the root cause and it's difficult to trigger.  I'll try to fix it with
another patch.

Cc: Christian Brauner <brauner@kernel.org>
Cc: Günther Noack <gnoack@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Will Drewry <wad@chromium.org>
Cc: stable@vger.kernel.org
Closes: https://lore.kernel.org/r/9341d4db-5e21-418c-bf9e-9ae2da7877e1@sirena.org.uk
Fixes: a86f18903db9 ("selftests/harness: Fix interleaved scheduling leading to race conditions")
Fixes: 24cf65a62266 ("selftests/harness: Share _metadata between forked processes")
Signed-off-by: Mickaël Salaün <mic@digikod.net>
Link: https://lore.kernel.org/r/20240621180605.834676-1-mic@digikod.net
---
 tools/testing/selftests/kselftest_harness.h | 43 ++++++++++++---------
 1 file changed, 24 insertions(+), 19 deletions(-)


base-commit: 83a7eefedc9b56fe7bfeff13b6c7356688ffa670

Comments

Mickaël Salaün June 25, 2024, 7:30 a.m. UTC | #1
I pushed it to my next branch.

Mark, Shuah, and others, please let me know if kselftest and KernelCI
are better with that.

On Fri, Jun 21, 2024 at 08:06:05PM +0200, Mickaël Salaün wrote:
> We cannot use CLONE_VFORK because we also need to wait for the timeout
> signal.
> 
> Restore tests timeout by using the original fork() call in __run_test()
> but also in __TEST_F_IMPL().  Also fix a race condition when waiting for
> the test child process.
> 
> Because test metadata are shared between test processes, only the
> parent process must set the test PID (child).  Otherwise, t->pid may be
> set to zero, leading to inconsistent error cases:
> 
>   #  RUN           layout1.rule_on_mountpoint ...
>   # rule_on_mountpoint: Test ended in some other way [127]
>   #            OK  layout1.rule_on_mountpoint
>   ok 20 layout1.rule_on_mountpoint
> 
> As safeguards, initialize the "status" variable with a valid exit code,
> and handle unknown test exits as errors.
> 
> The use of fork() introduces a new race condition in landlock/fs_test.c
> which seems to be specific to hostfs bind mounts, but I haven't found
> the root cause and it's difficult to trigger.  I'll try to fix it with
> another patch.
> 
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Günther Noack <gnoack@google.com>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Mark Brown <broonie@kernel.org>
> Cc: Shuah Khan <shuah@kernel.org>
> Cc: Will Drewry <wad@chromium.org>
> Cc: stable@vger.kernel.org
> Closes: https://lore.kernel.org/r/9341d4db-5e21-418c-bf9e-9ae2da7877e1@sirena.org.uk
> Fixes: a86f18903db9 ("selftests/harness: Fix interleaved scheduling leading to race conditions")
> Fixes: 24cf65a62266 ("selftests/harness: Share _metadata between forked processes")
> Signed-off-by: Mickaël Salaün <mic@digikod.net>
> Link: https://lore.kernel.org/r/20240621180605.834676-1-mic@digikod.net
> ---
>  tools/testing/selftests/kselftest_harness.h | 43 ++++++++++++---------
>  1 file changed, 24 insertions(+), 19 deletions(-)
> 
> diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
> index b634969cbb6f..40723a6a083f 100644
> --- a/tools/testing/selftests/kselftest_harness.h
> +++ b/tools/testing/selftests/kselftest_harness.h
> @@ -66,8 +66,6 @@
>  #include <sys/wait.h>
>  #include <unistd.h>
>  #include <setjmp.h>
> -#include <syscall.h>
> -#include <linux/sched.h>
>  
>  #include "kselftest.h"
>  
> @@ -82,17 +80,6 @@
>  #  define TH_LOG_ENABLED 1
>  #endif
>  
> -/* Wait for the child process to end but without sharing memory mapping. */
> -static inline pid_t clone3_vfork(void)
> -{
> -	struct clone_args args = {
> -		.flags = CLONE_VFORK,
> -		.exit_signal = SIGCHLD,
> -	};
> -
> -	return syscall(__NR_clone3, &args, sizeof(args));
> -}
> -
>  /**
>   * TH_LOG()
>   *
> @@ -437,7 +424,7 @@ static inline pid_t clone3_vfork(void)
>  		} \
>  		if (setjmp(_metadata->env) == 0) { \
>  			/* _metadata and potentially self are shared with all forks. */ \
> -			child = clone3_vfork(); \
> +			child = fork(); \
>  			if (child == 0) { \
>  				fixture_name##_setup(_metadata, self, variant->data); \
>  				/* Let setup failure terminate early. */ \
> @@ -1016,7 +1003,14 @@ void __wait_for_test(struct __test_metadata *t)
>  		.sa_flags = SA_SIGINFO,
>  	};
>  	struct sigaction saved_action;
> -	int status;
> +	/*
> +	 * Sets status so that WIFEXITED(status) returns true and
> +	 * WEXITSTATUS(status) returns KSFT_FAIL.  This safe default value
> +	 * should never be evaluated because of the waitpid(2) check and
> +	 * SIGALRM handling.
> +	 */
> +	int status = KSFT_FAIL << 8;
> +	int child;
>  
>  	if (sigaction(SIGALRM, &action, &saved_action)) {
>  		t->exit_code = KSFT_FAIL;
> @@ -1028,7 +1022,15 @@ void __wait_for_test(struct __test_metadata *t)
>  	__active_test = t;
>  	t->timed_out = false;
>  	alarm(t->timeout);
> -	waitpid(t->pid, &status, 0);
> +	child = waitpid(t->pid, &status, 0);
> +	if (child == -1 && errno != EINTR) {
> +		t->exit_code = KSFT_FAIL;
> +		fprintf(TH_LOG_STREAM,
> +			"# %s: Failed to wait for PID %d (errno: %d)\n",
> +			t->name, t->pid, errno);
> +		return;
> +	}
> +
>  	alarm(0);
>  	if (sigaction(SIGALRM, &saved_action, NULL)) {
>  		t->exit_code = KSFT_FAIL;
> @@ -1083,6 +1085,7 @@ void __wait_for_test(struct __test_metadata *t)
>  				WTERMSIG(status));
>  		}
>  	} else {
> +		t->exit_code = KSFT_FAIL;
>  		fprintf(TH_LOG_STREAM,
>  			"# %s: Test ended in some other way [%u]\n",
>  			t->name,
> @@ -1218,6 +1221,7 @@ void __run_test(struct __fixture_metadata *f,
>  	struct __test_xfail *xfail;
>  	char test_name[1024];
>  	const char *diagnostic;
> +	int child;
>  
>  	/* reset test struct */
>  	t->exit_code = KSFT_PASS;
> @@ -1236,15 +1240,16 @@ void __run_test(struct __fixture_metadata *f,
>  	fflush(stdout);
>  	fflush(stderr);
>  
> -	t->pid = clone3_vfork();
> -	if (t->pid < 0) {
> +	child = fork();
> +	if (child < 0) {
>  		ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
>  		t->exit_code = KSFT_FAIL;
> -	} else if (t->pid == 0) {
> +	} else if (child == 0) {
>  		setpgrp();
>  		t->fn(t, variant);
>  		_exit(t->exit_code);
>  	} else {
> +		t->pid = child;
>  		__wait_for_test(t);
>  	}
>  	ksft_print_msg("         %4s  %s\n",
> 
> base-commit: 83a7eefedc9b56fe7bfeff13b6c7356688ffa670
> -- 
> 2.45.2
> 
>
diff mbox series

Patch

diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index b634969cbb6f..40723a6a083f 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -66,8 +66,6 @@ 
 #include <sys/wait.h>
 #include <unistd.h>
 #include <setjmp.h>
-#include <syscall.h>
-#include <linux/sched.h>
 
 #include "kselftest.h"
 
@@ -82,17 +80,6 @@ 
 #  define TH_LOG_ENABLED 1
 #endif
 
-/* Wait for the child process to end but without sharing memory mapping. */
-static inline pid_t clone3_vfork(void)
-{
-	struct clone_args args = {
-		.flags = CLONE_VFORK,
-		.exit_signal = SIGCHLD,
-	};
-
-	return syscall(__NR_clone3, &args, sizeof(args));
-}
-
 /**
  * TH_LOG()
  *
@@ -437,7 +424,7 @@  static inline pid_t clone3_vfork(void)
 		} \
 		if (setjmp(_metadata->env) == 0) { \
 			/* _metadata and potentially self are shared with all forks. */ \
-			child = clone3_vfork(); \
+			child = fork(); \
 			if (child == 0) { \
 				fixture_name##_setup(_metadata, self, variant->data); \
 				/* Let setup failure terminate early. */ \
@@ -1016,7 +1003,14 @@  void __wait_for_test(struct __test_metadata *t)
 		.sa_flags = SA_SIGINFO,
 	};
 	struct sigaction saved_action;
-	int status;
+	/*
+	 * Sets status so that WIFEXITED(status) returns true and
+	 * WEXITSTATUS(status) returns KSFT_FAIL.  This safe default value
+	 * should never be evaluated because of the waitpid(2) check and
+	 * SIGALRM handling.
+	 */
+	int status = KSFT_FAIL << 8;
+	int child;
 
 	if (sigaction(SIGALRM, &action, &saved_action)) {
 		t->exit_code = KSFT_FAIL;
@@ -1028,7 +1022,15 @@  void __wait_for_test(struct __test_metadata *t)
 	__active_test = t;
 	t->timed_out = false;
 	alarm(t->timeout);
-	waitpid(t->pid, &status, 0);
+	child = waitpid(t->pid, &status, 0);
+	if (child == -1 && errno != EINTR) {
+		t->exit_code = KSFT_FAIL;
+		fprintf(TH_LOG_STREAM,
+			"# %s: Failed to wait for PID %d (errno: %d)\n",
+			t->name, t->pid, errno);
+		return;
+	}
+
 	alarm(0);
 	if (sigaction(SIGALRM, &saved_action, NULL)) {
 		t->exit_code = KSFT_FAIL;
@@ -1083,6 +1085,7 @@  void __wait_for_test(struct __test_metadata *t)
 				WTERMSIG(status));
 		}
 	} else {
+		t->exit_code = KSFT_FAIL;
 		fprintf(TH_LOG_STREAM,
 			"# %s: Test ended in some other way [%u]\n",
 			t->name,
@@ -1218,6 +1221,7 @@  void __run_test(struct __fixture_metadata *f,
 	struct __test_xfail *xfail;
 	char test_name[1024];
 	const char *diagnostic;
+	int child;
 
 	/* reset test struct */
 	t->exit_code = KSFT_PASS;
@@ -1236,15 +1240,16 @@  void __run_test(struct __fixture_metadata *f,
 	fflush(stdout);
 	fflush(stderr);
 
-	t->pid = clone3_vfork();
-	if (t->pid < 0) {
+	child = fork();
+	if (child < 0) {
 		ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
 		t->exit_code = KSFT_FAIL;
-	} else if (t->pid == 0) {
+	} else if (child == 0) {
 		setpgrp();
 		t->fn(t, variant);
 		_exit(t->exit_code);
 	} else {
+		t->pid = child;
 		__wait_for_test(t);
 	}
 	ksft_print_msg("         %4s  %s\n",