diff mbox series

[mdadm,1/2] mdadm: Fix hang race condition in wait_for_zero_forks()

Message ID 20240604163837.798219-2-logang@deltatee.com (mailing list archive)
State Accepted
Headers show
Series Bug fixes for --write-zeros option | expand

Checks

Context Check Description
mdraidci/vmtest-md-6_11-PR fail merge-conflict
mdraidci/vmtest-md-6_11-VM_Test-0 success Logs for Lint
mdraidci/vmtest-md-6_11-VM_Test-1 success Logs for ShellCheck
mdraidci/vmtest-md-6_11-VM_Test-3 success Logs for Validate matrix.py
mdraidci/vmtest-md-6_11-VM_Test-5 success Logs for x86_64-gcc / build / build for x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-2 success Logs for Unittests
mdraidci/vmtest-md-6_11-VM_Test-4 success Logs for set-matrix
mdraidci/vmtest-md-6_11-VM_Test-6 success Logs for x86_64-gcc / build-release
mdraidci/vmtest-md-6_11-VM_Test-7 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-8 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-9 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-10 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-15 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
mdraidci/vmtest-md-6_11-VM_Test-11 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-17 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
mdraidci/vmtest-md-6_11-VM_Test-27 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
mdraidci/vmtest-md-6_11-VM_Test-13 fail Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-14 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
mdraidci/vmtest-md-6_11-VM_Test-26 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
mdraidci/vmtest-md-6_11-VM_Test-12 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
mdraidci/vmtest-md-6_11-VM_Test-21 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
mdraidci/vmtest-md-6_11-VM_Test-16 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
mdraidci/vmtest-md-6_11-VM_Test-23 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
mdraidci/vmtest-md-6_11-VM_Test-28 success Logs for x86_64-llvm-18 / veristat
mdraidci/vmtest-md-6_11-VM_Test-20 success Logs for x86_64-llvm-17 / veristat
mdraidci/vmtest-md-6_11-VM_Test-24 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
mdraidci/vmtest-md-6_11-VM_Test-19 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
mdraidci/vmtest-md-6_11-VM_Test-22 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
mdraidci/vmtest-md-6_11-VM_Test-18 fail Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
mdraidci/vmtest-md-6_11-VM_Test-25 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18

Commit Message

Logan Gunthorpe June 4, 2024, 4:38 p.m. UTC
Running a create operation with --write-zeros can randomly hang
forever waiting for child processes. This happens roughly on in
ten runs with when running with small (20MB) loop devices.

The bug is caused by the fact that signals can be coallesced into
one if they are not read by signalfd quick enough. So if two children
finish at exactly the same time, only one SIGCHLD will be received
by the parent.

To fix this, wait on all processes with WNOHANG every time a SIGCHLD
is recieved and exit when all processes have been waited on.

Reported-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
---
 Create.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

Comments

Paul Menzel June 4, 2024, 9:34 p.m. UTC | #1
Dear Logan,


Thank you for the patch.


Am 04.06.24 um 18:38 schrieb Logan Gunthorpe:
> Running a create operation with --write-zeros can randomly hang
> forever waiting for child processes. This happens roughly on in
> ten runs with when running with small (20MB) loop devices.
> 
> The bug is caused by the fact that signals can be coallesced into
> one if they are not read by signalfd quick enough. So if two children
> finish at exactly the same time, only one SIGCHLD will be received
> by the parent.
> 
> To fix this, wait on all processes with WNOHANG every time a SIGCHLD
> is recieved and exit when all processes have been waited on.

received

> Reported-by: Xiao Ni <xni@redhat.com>
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
> ---
>   Create.c | 28 +++++++++++++++-------------
>   1 file changed, 15 insertions(+), 13 deletions(-)
> 
> diff --git a/Create.c b/Create.c
> index d033eb68f30c..4f992a22b7c9 100644
> --- a/Create.c
> +++ b/Create.c
> @@ -178,6 +178,7 @@ static int wait_for_zero_forks(int *zero_pids, int count)
>   	bool interrupted = false;
>   	sigset_t sigset;
>   	ssize_t s;
> +	pid_t pid;
>   
>   	for (i = 0; i < count; i++)
>   		if (zero_pids[i])
> @@ -196,7 +197,7 @@ static int wait_for_zero_forks(int *zero_pids, int count)
>   		return 1;
>   	}
>   
> -	while (1) {
> +	while (wait_count) {
>   		s = read(sfd, &fdsi, sizeof(fdsi));
>   		if (s != sizeof(fdsi)) {
>   			pr_err("Invalid signalfd read: %s\n", strerror(errno));
> @@ -209,23 +210,24 @@ static int wait_for_zero_forks(int *zero_pids, int count)
>   			pr_info("Interrupting zeroing processes, please wait...\n");
>   			interrupted = true;
>   		} else if (fdsi.ssi_signo == SIGCHLD) {
> -			if (!--wait_count)
> -				break;
> +			for (i = 0; i < count; i++) {
> +				if (!zero_pids[i])
> +					continue;
> +
> +				pid = waitpid(zero_pids[i], &wstatus, WNOHANG);
> +				if (pid <= 0)
> +					continue;
> +
> +				zero_pids[i] = 0;
> +				if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus))
> +					ret = 1;
> +				wait_count--;
> +			}
>   		}
>   	}
>   
>   	close(sfd);
>   
> -	for (i = 0; i < count; i++) {
> -		if (!zero_pids[i])
> -			continue;
> -
> -		waitpid(zero_pids[i], &wstatus, 0);
> -		zero_pids[i] = 0;
> -		if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus))
> -			ret = 1;
> -	}
> -
>   	if (interrupted) {
>   		pr_err("zeroing interrupted!\n");
>   		return 1;

Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>


Kind regards,

Paul
diff mbox series

Patch

diff --git a/Create.c b/Create.c
index d033eb68f30c..4f992a22b7c9 100644
--- a/Create.c
+++ b/Create.c
@@ -178,6 +178,7 @@  static int wait_for_zero_forks(int *zero_pids, int count)
 	bool interrupted = false;
 	sigset_t sigset;
 	ssize_t s;
+	pid_t pid;
 
 	for (i = 0; i < count; i++)
 		if (zero_pids[i])
@@ -196,7 +197,7 @@  static int wait_for_zero_forks(int *zero_pids, int count)
 		return 1;
 	}
 
-	while (1) {
+	while (wait_count) {
 		s = read(sfd, &fdsi, sizeof(fdsi));
 		if (s != sizeof(fdsi)) {
 			pr_err("Invalid signalfd read: %s\n", strerror(errno));
@@ -209,23 +210,24 @@  static int wait_for_zero_forks(int *zero_pids, int count)
 			pr_info("Interrupting zeroing processes, please wait...\n");
 			interrupted = true;
 		} else if (fdsi.ssi_signo == SIGCHLD) {
-			if (!--wait_count)
-				break;
+			for (i = 0; i < count; i++) {
+				if (!zero_pids[i])
+					continue;
+
+				pid = waitpid(zero_pids[i], &wstatus, WNOHANG);
+				if (pid <= 0)
+					continue;
+
+				zero_pids[i] = 0;
+				if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus))
+					ret = 1;
+				wait_count--;
+			}
 		}
 	}
 
 	close(sfd);
 
-	for (i = 0; i < count; i++) {
-		if (!zero_pids[i])
-			continue;
-
-		waitpid(zero_pids[i], &wstatus, 0);
-		zero_pids[i] = 0;
-		if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus))
-			ret = 1;
-	}
-
 	if (interrupted) {
 		pr_err("zeroing interrupted!\n");
 		return 1;